Python SeqGen Examples

Programming Language: Python

Namespace/Package Name: dendropy.interop.seqgen

Method/Function: SeqGen

Examples at hotexamples.com: 8

Python SeqGen - 8 examples found. These are the top rated real world Python examples of dendropy.interop.seqgen.SeqGen extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: seqgen.py Project: rackerm4/PsmTreeToSeq-nf

def seqgen_to_file(files, seqgen_vals):
    s = seqgen.SeqGen()
    s.scale_branch_lens = 0.1
    for k, v in seqgen_vals.items():
        seqgen_vals[k] = seqgen.SeqGen(v)
    for file in files:
        schema = file.split('.')[1]
        trees = dendropy.Tree.get(path=file, schema=schema)
        filename = "seq_{}".format(file.split('.')[0] + "." + schema)
        d1 = s.generate(trees)
        with open(filename, "w") as f:
            f.write(d1.char_matrices[0].as_string(schema))

Example #2

Show file

File: popgensim.py Project: wook2014/DendroPy

    def generate_sequences(self,
                           species_name,
                           samples_per_pop=10,
                           seq_len=2000,
                           use_seq_gen=True):

        self.generate_pop_tree(species_name=species_name,
                               samples_per_pop=samples_per_pop)
        self.generate_gene_tree(species_name=species_name,
                                samples_per_pop=samples_per_pop)
        d = dendropy.DataSet(self.mutation_tree.taxon_namespace)
        if self.use_seq_gen is True:
            sg = seqgen.SeqGen()
            sg.seqgen_path = self.seqgen_path
            sg.num_replicates = 1
            sg.quiet = True
            sg.rng = self.rng
            sg.seq_len = seq_len
            sg.char_model = 'HKY'
            sg.ti_tv = float(self.kappa) / 2
            sg.state_freqs = self.base_freqs
            sg.trees = [self.mutation_tree]
            d = sg.generate_dataset(dataset=d)
        else:
            char_matrix = discrete.hky85_chars(
                seq_len=seq_len,
                tree_model=self.mutation_tree,
                mutation_rate=1.0,
                kappa=1.0,
                base_freqs=[0.25, 0.25, 0.25, 0.25],
                root_states=None,
                rng=self.rng)
            d.add_char_matrix(char_matrix)
        return d

Example #3

Show file

def simulate_gtr_matrix(tree, seq_length, frequencies, rates, branch_scale):
    s = seqgen.SeqGen()
    s.char_model = seqgen.SeqGen.GTR
    s.state_freqs = frequencies
    s.general_rates = rates
    s.scale_branch_lens = branch_scale
    s.seq_len = seq_length
    d = s.generate(tree)
    fasta_string = d.char_matrices[0].as_string('fasta')
    return fasta_string

Example #4

Show file

#! /usr/bin/env python
# -*- coding: utf-8 -*-

import dendropy
from dendropy.interop import seqgen

trees = dendropy.TreeList.get(path="pythonidae.mcmc.nex", schema="nexus")
s = seqgen.SeqGen()

# generate one alignment per tree
# as substitution model is not specified, defaults to a JC model
# will result in a DataSet object with one DnaCharacterMatrix per input tree
d0 = s.generate(trees)
print(len(d0.char_matrices))
print(d0.char_matrices[0].as_string("nexus"))

# instruct Seq-Gen to scale branch lengths by factor of 0.1
# note that this does not modify the input trees
s.scale_branch_lens = 0.1

# more complex model
s.char_model = seqgen.SeqGen.GTR
s.state_freqs = [0.4, 0.4, 0.1, 0.1]
s.general_rates = [0.8, 0.4, 0.4, 0.2, 0.2, 0.1]
d1 = s.generate(trees)
print(len(d0.char_matrices))
print(d0.char_matrices[0].as_string("nexus"))

Example #5

Show file

File: spdw-build-bpp-jobs.py Project: jeetsukumaran/spdw

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("source_trees",
            metavar="SOURCE_TREEFILE [SOURCE_TREEFILE [SOURCE_TREEFILE]]",
            nargs="+",
            help="Path to containing tree files. Specify '-' to read from standard input.")
    parser.add_argument("-f", "--input-format",
            default="nexus",
            dest="schema",
            help="Input trees format (default: $(default)s).")
    parser.add_argument("-z", "--random-seed",
            type=int,
            default=None,
            help="Seed for random number generator engine.")
    parser.add_argument("-t", "--title",
            default="bpprun",
            help="Run title (default: '%(default)s')")
    data_options = parser.add_argument_group("Data Options")
    data_options.add_argument("--population-size",
            type=int,
            default=1.0,
            help="Population size (default: %(default)s).")
    data_options.add_argument("--num-individuals-per-population",
            type=int,
            default=4,
            help="Number of individuals sampled per incipient species lineage (default: %(default)s).")
    data_options.add_argument("--num-loci-per-individual",
            type=int,
            default=10,
            help="Number of loci sampled per individual (default: %(default)s).")
    data_options.add_argument("--num-characters-per-locus",
            type=int,
            default=1000,
            help="Number of characters sampled per locus (default: %(default)s).")
    data_options.add_argument("--mutation-rate-per-site",
            type=float,
            # default=0.00001,
            default=1e-8,
            help="Per-site mutation rate (default: %(default)s).")
    parser.add_argument("--no-scale-tree-by-mutation-rate",
            action="store_true",
            help="Do not scale tree by mutation rate.")
    args = parser.parse_args()

    if args.random_seed is None:
        random_seed = random.randint(0, sys.maxsize-1)
    else:
        random_seed = args.random_seed
    rng = random.Random(random_seed)
    _log("Random seed: {}".format(random_seed))

    sg = seqgen.SeqGen()
    sg.seq_len = args.num_characters_per_locus
    sg.scale_branch_lens = args.mutation_rate_per_site

    if "-" in args.source_trees:
        filepaths = sys.stdin.read().split("\n")
        args.source_trees.remove("-")
    else:
        filepaths = []

    manifest_entries = []
    filepaths.extend(args.source_trees)
    for idx, filepath in enumerate(filepaths):
        job_title = "{}_{:05d}".format(args.title, idx+1)
        manifest_entry = collections.OrderedDict()
        _log("{} of {}: {}: {}".format(idx+1, len(filepaths), job_title, filepath))
        source_tree = dendropy.Tree.get(
                path=filepath,
                schema=args.schema,
                extract_comment_metadata=True,
                preserve_underscores=True,
                )

        manifest_entry["speciation_initiation_from_orthospecies_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_orthospecies_rate"].value)
        manifest_entry["speciation_initiation_from_incipient_species_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_incipient_species_rate"].value)
        manifest_entry["speciation_completion_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_completion_rate"].value)
        manifest_entry["orthospecies_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["orthospecies_extinction_rate"].value)
        manifest_entry["incipient_species_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["incipient_species_extinction_rate"].value)
        manifest_entry["max_time"] = try_to_coerce_to_float(source_tree.annotations["max_time"].value)
        manifest_entry["max_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["max_extant_orthospecies"].value)
        manifest_entry["num_extant_lineages"] = try_to_coerce_to_float(source_tree.annotations["num_extant_lineages"].value)
        manifest_entry["num_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["num_extant_orthospecies"].value)
        manifest_entry["source_tree_type"] = source_tree.annotations["tree_type"].value
        manifest_entry["population_size"] = args.population_size
        manifest_entry["num_individuals_per_population"] = args.num_individuals_per_population
        manifest_entry["num_loci_per_individual"] = args.num_loci_per_individual
        manifest_entry["mutation_rate_per_site"] = args.mutation_rate_per_site

        source_tree.calc_node_ages()

        gene_trees = generate_contained_trees(
                containing_tree=source_tree,
                num_individuals_per_population=args.num_individuals_per_population,
                num_gene_trees=args.num_loci_per_individual,
                population_size=args.population_size,
                rng=rng,
                )

        imap_filepath = "{}.input.imap.txt".format(job_title)
        f = open(imap_filepath, "w")
        for taxon in gene_trees.taxon_namespace:
            f.write("{}    {}\n".format(taxon.label.split("^")[1], taxon.population_label))
            # f.write("{}    {}\n".format(taxon.label.split("^")[0], taxon.population_label))
            # f.write("{}    {}\n".format(taxon.label, taxon.population_label))
        f.write("\n")

        d0 = sg.generate(gene_trees)
        chars_filepath = "{}.input.chars.txt".format(job_title)
        f = open(chars_filepath, "w")
        for cm_idx, cm in enumerate(d0.char_matrices):
            sys.stderr.write("Locus {}: pi = {}, Tajima's D = {}\n".format(
                cm_idx+1,
                popgenstat.nucleotide_diversity(cm),
                popgenstat.tajimas_d(cm)))
            cm.write(file=f, schema="phylip")
            f.write("\n")

        out_filepath = "{}.results.out.txt".format(job_title)
        mcmc_filepath = "{}.results.mcmc.txt".format(job_title)
        num_species = len(source_tree.taxon_namespace)
        species_labels = " ".join(t.label for t in source_tree.taxon_namespace)
        num_individuals_per_species = " ".join(str(args.num_individuals_per_population) for i in range(len(source_tree.taxon_namespace)))

        # Inverse Gamma Prior
        # IG(a,b), with mean given by b/(a-1)
        # So,
        #   thetaprior 3 0.002
        # has a mean of
        #   0.002/(3-1) = 0.001
        theta_prior_mean = args.population_size * 4 * args.mutation_rate_per_site
        theta_prior_a = 3.0
        theta_prior_b = theta_prior_mean * (theta_prior_a - 1)
        if args.no_scale_tree_by_mutation_rate:
            tau_prior_mean = source_tree.seed_node.age
        else:
            # tau_prior_mean = source_tree.seed_node.age * args.population_size * 4 * args.mutation_rate_per_site
            tau_prior_mean = source_tree.seed_node.age * args.mutation_rate_per_site * (1.0 / (args.num_loci_per_individual * args.num_characters_per_locus))
            # tau_prior_mean = source_tree.seed_node.age / 100000
        tau_prior_a = 3.0
        tau_prior_b = tau_prior_mean * (tau_prior_a - 1)

        manifest_entry["num_input_lineages"] = len(species_labels)
        manifest_entry["theta"] = theta_prior_mean
        manifest_entry["theta_prior_a"] = theta_prior_a
        manifest_entry["theta_prior_b"] = theta_prior_b
        manifest_entry["root_age"] = source_tree.seed_node.age
        manifest_entry["tau_prior_a"] = tau_prior_a
        manifest_entry["tau_prior_b"] = tau_prior_b

        species_tree = source_tree.as_string(
                schema="newick",
                suppress_leaf_taxon_labels=False,
                suppress_leaf_node_labels=True,
                suppress_internal_taxon_labels=True,
                suppress_internal_node_labels=True,
                suppress_rooting=True,
                suppress_edge_lengths=True,
                unquoted_underscores=True,
                preserve_spaces=True,
                store_tree_weights=False,
                suppress_annotations=True,
                suppress_item_comments=True,
                )
        bpp_config = BPP_TEMPLATE.format(
                chars_filepath=chars_filepath,
                imap_filepath=imap_filepath,
                out_filepath=out_filepath,
                mcmc_filepath=mcmc_filepath,
                num_species=num_species,
                species_labels=species_labels,
                num_individuals_per_species=num_individuals_per_species,
                species_tree=species_tree,
                theta_prior_mean=theta_prior_mean,
                theta_prior_a=theta_prior_a,
                theta_prior_b=theta_prior_b,
                tau_prior_mean=tau_prior_mean,
                tau_prior_a=tau_prior_a,
                tau_prior_b=tau_prior_b,
                num_loci=args.num_loci_per_individual,
                root_age=source_tree.seed_node.age
                )
        bpp_ctl_filepath = "{}.input.bpp.ctl".format(job_title)
        f = open(bpp_ctl_filepath, "w")
        f.write(bpp_config)
        f.write("\n")

        jobf = open("{}.job.sge".format(job_title), "w")
        jobf.write("#! /bin/bash\n")
        jobf.write("#$ -cwd\n")
        jobf.write("#$ -V\n")
        jobf.write("#$ -S /bin/bash\n")
        jobf.write("#$ -l h_vmem=12G\n")
        jobf.write("#$ -l virtual_free=12G\n")
        jobf.write("bpp --cfile {}\n".format(bpp_ctl_filepath))

        manifest_entry["source_tree_path"] = filepath
        manifest_entry["results_filepath"] = out_filepath
        manifest_entry["mcmc_filepath"] = mcmc_filepath
        manifest_entries.append(manifest_entry)

    out = _open_output_file_for_csv_writer(
            filepath="{}_manifest.csv".format(args.title),
            append=False)
    with out:
        writer = csv.DictWriter(
                out,
                fieldnames=manifest_entries[0].keys(),
                restval="NA",
                delimiter=",",
                lineterminator=os.linesep,
                )
        writer.writeheader()
        writer.writerows(manifest_entries)

Example #6

Show file

File: test_tree.py Project: jeetsukumaran/msc-sdp-PNAS

def main():
    parser = argparse.ArgumentParser()
    parameter_options = parser.add_argument_group("Model Parameters")
    parameter_options.add_argument("--b1", "--speciation_initiation_from_orthospecies_rate",
            type=float,
            dest="speciation_initiation_from_orthospecies_rate",
            default=1.0,
            help="Rate at which orthospecies give rise to new incipient species [default: %(default)s].")
    parameter_options.add_argument("--b2", "--speciation_initiation_from_incipient_species_rate",
            type=float,
            dest="speciation_initiation_from_incipient_species_rate",
            default=1.0,
            help="Rate at which incipient species give rise to new incipient species [default: %(default)s].")
    parameter_options.add_argument("--c1", "--speciation-completion-rate",
            type=float,
            dest="speciation_completion_rate",
            default=1.0,
            help="Rate at which incipient species become orthospecies [default: %(default)s].")
    parameter_options.add_argument("--e1", "--orthospecies-extinction-rate",
            type=float,
            dest="orthospecies_extinction_rate",
            default=1.0,
            help="Rate at which orthospecies go extinct [default: %(default)s].")
    parameter_options.add_argument("--e2", "--incipient-species-extinction-rate",
            type=float,
            dest="incipient_species_extinction_rate",
            default=1.0,
            help="Rate at which incipient species go extinct [default: %(default)s].")
    termination_options = parser.add_argument_group("Simulation Termination Conditions")
    termination_options.add_argument("--max-time",
            type=float,
            default=None,
            help="Maximum length of time to to run (default: %(default)s).")
    termination_options.add_argument("--max-extant-orthospecies",
            type=int,
            default=None,
            help="Maximum number of orthospecies to generate (default: %(default)s).")
    termination_options.add_argument("--max-extant-lineages",
            type=int,
            default=None,
            help="Maximum number of lineages to generate (default: %(default)s).")
    data_options = parser.add_argument_group("Data Options")
    data_options.add_argument("--population-size",
            type=int,
            default=10000,
            help="Population size (default: %(default)s).")
    data_options.add_argument("--num-individuals-per-population",
            type=int,
            default=4,
            help="Number of individuals sampled per incipient species lineage (default: %(default)s).")
    data_options.add_argument("--num-loci-per-individual",
            type=int,
            default=10,
            help="Number of loci sampled per individual (default: %(default)s).")
    data_options.add_argument("--num-characters-per-locus",
            type=int,
            default=10,
            help="Number of characters sampled per locus (default: %(default)s).")
    data_options.add_argument("--mutation-rate-per-site",
            type=float,
            default=0.00001,
            help="Per-site mutation rate (default: %(default)s).")
    run_options = parser.add_argument_group("Run Options")
    run_options.add_argument("-t", "--run-title",
            default="run",
            help="Run title (default: '%(default)s')")
    run_options.add_argument("-n", "--nreps",
            type=int,
            default=10,
            help="Number of replicates (default: %(default)s).")
    run_options.add_argument("-z", "--random-seed",
            type=int,
            default=None,
            help="Seed for random number generator engine.")
    args = parser.parse_args()

    if not args.max_time and not args.max_extant_orthospecies and not args.max_extant_lineages:
        sys.exit("Need to specify termination condition, at least one of: '--max-time', '--max-extant-orthospecies', '--max-extant-lineages'")
    if args.random_seed is None:
        random_seed = random.randint(0, sys.maxint-1)
    else:
        random_seed = args.random_seed

    _log("Random seed: {}".format(random_seed))
    rng = random.Random(random_seed)
    psm = protractedspeciation.ProtractedSpeciationProcess(
            speciation_initiation_from_orthospecies_rate=args.speciation_initiation_from_orthospecies_rate,
            orthospecies_extinction_rate=args.orthospecies_extinction_rate,
            speciation_initiation_from_incipient_species_rate=args.speciation_initiation_from_incipient_species_rate,
            speciation_completion_rate=args.speciation_completion_rate,
            incipient_species_extinction_rate=args.incipient_species_extinction_rate,
            rng=rng,)
    sg = seqgen.SeqGen()
    sg.scale_branch_lengths = args.mutation_rate_per_site
    for rep in range(args.nreps):
        job_title = "{}_{:03d}".format(args.run_title, rep+1)
        _log("Replicate {} of {}: {}".format(rep+1, args.nreps, job_title))
        lineage_tree, orthospecies_tree = psm.generate_sample(
                max_time=args.max_time,
                max_extant_orthospecies=args.max_extant_orthospecies,
                max_extant_lineages=args.max_extant_lineages,
                is_initial_lineage_orthospecies=True,
                # is_correlate_lineage_and_species_trees=True,
                )
        # lineage_tree.calc_node_ages()
        # orthospecies_tree.calc_node_ages()
        # _log("    Incipient species tree: {} tips, root age = {} ({} mutation units)".format(len(lineage_tree.leaf_nodes()), lineage_tree.seed_node.age, lineage_tree.seed_node.age * args.mutation_rate_per_site,))
        # _log("    Orthospecies tree:      {} tips, root age = {} ({} mutation units)".format(len(orthospecies_tree.leaf_nodes()), orthospecies_tree.seed_node.age))
        label_lineage_tree(lineage_tree)
        label_orthospecies_tree(orthospecies_tree)
        lineage_tree.write(path="x1.nexus", schema="nexus")
        orthospecies_tree.write(path="x2.nexus", schema="nexus")

        logf = open("{}.setup.log".format(job_title), "w")
        logf.write("-  Replicate {} of {} generated by command: {}\n".format(rep+1, args.nreps, " ".join(sys.argv)))
        logf.write("\n")
        logf.write("-  Random seed used: {}\n".format(random_seed))
        logf.write("\n")
        describe_tree(logf, lineage_tree, "-  Lineage Tree Profile")
        logf.write("\n")
        describe_tree(logf, orthospecies_tree, "-  Orthospecies Tree Profile")
        logf.write("\n")
        logf.write("-  Protracted Speciation Model Parameters\n")
        logf.write("   -       Speciation initiation from orthospecies rate: {}\n".format(args.speciation_initiation_from_orthospecies_rate))
        logf.write("   -  Speciation initiation from incipient species rate: {}\n".format(args.speciation_initiation_from_incipient_species_rate))
        logf.write("   -                         Speciation completion rate: {}\n".format(args.speciation_completion_rate))
        logf.write("   -                       Orthospecies extinction rate: {}\n".format(args.orthospecies_extinction_rate))
        logf.write("   -                  Incipient species extinction rate: {}\n".format(args.incipient_species_extinction_rate))
        logf.write("   -               Termination: Maximum simulation time: {}\n".format(args.max_time))
        logf.write("   -                  Termination: Maximum orthospecies: {}\n".format(args.max_extant_orthospecies))
        logf.write("   -                      Termination: Maximum lineages: {}\n".format(args.max_extant_lineages))
        logf.write("\n")
        logf.write("-  Data Generation Parameters\n")
        logf.write("   -                                    Population size: {}\n".format(args.population_size))
        logf.write("   -                 Individuals per species/population: {}\n".format(args.num_individuals_per_population))
        logf.write("   -                      Number of loci per individual: {}\n".format(args.num_loci_per_individual))
        logf.write("   -                             Per-site mutation rate: {}\n".format(args.mutation_rate_per_site))
        logf.write("\n")

Example #7

Show file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("source_trees",
            metavar="SOURCE_TREEFILE [SOURCE_TREEFILE [SOURCE_TREEFILE]]",
            nargs="+",
            help="Path to source of tree files. Specify '-' to read from standard input.")
    parser.add_argument("-f", "--input-format",
            default="nexus",
            dest="schema",
            help="Input trees format (default: $(default)s).")
    parser.add_argument("-z", "--random-seed",
            type=int,
            default=None,
            help="Seed for random number generator engine.")
    parser.add_argument("-t", "--title",
            default="bpprun",
            help="Run title (default: '%(default)s')")
    data_options = parser.add_argument_group("Data Options")
    data_options.add_argument("--population-size",
            type=int,
            default=10000,
            help="Population size (default: %(default)s).")
    data_options.add_argument("--total-number-of-individuals",
            type=int,
            default=200,
            help="Number of individuals sampled across all populations (default: %(default)s).")
    data_options.add_argument("--num-loci-per-individual",
            type=int,
            default=10,
            help="Number of loci sampled per individual (default: %(default)s).")
    data_options.add_argument("--num-characters-per-locus",
            type=int,
            default=1000,
            help="Number of characters sampled per locus (default: %(default)s).")
    data_options.add_argument("--mutation-rate-per-site",
            type=float,
            default=0.00001,
            help="Per-site mutation rate (default: %(default)s).")
    args = parser.parse_args()

    if args.random_seed is None:
        random_seed = random.randint(0, sys.maxint-1)
    else:
        random_seed = args.random_seed
    rng = random.Random(random_seed)
    s00.log("Random seed: {}".format(random_seed))

    sg = seqgen.SeqGen()
    sg.scale_branch_lengths = args.mutation_rate_per_site

    if "-" in args.source_trees:
        filepaths = sys.stdin.read().split("\n")
        args.source_trees.remove("-")
    else:
        filepaths = []

    manifest_entries = []
    filepaths.extend(args.source_trees)
    for idx, filepath in enumerate(filepaths):
        job_title = "{}_{:05d}".format(args.title, idx+1)
        manifest_entry = collections.OrderedDict()
        s00.log("{} of {}: {}: {}".format(idx+1, len(filepaths), job_title, filepath))
        try:
            source_tree = dendropy.Tree.get(
                    path=filepath,
                    schema=args.schema,
                    extract_comment_metadata=True,
                    preserve_underscores=True,
                    )
        except OSError, dendropy.DataError:
            s00.log("Skipping failed file: {}".format(filepath))
            continue

        manifest_entry["speciation_initiation_from_orthospecies_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_orthospecies_rate"].value)
        manifest_entry["speciation_initiation_from_incipient_species_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_incipient_species_rate"].value)
        manifest_entry["speciation_completion_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_completion_rate"].value)
        manifest_entry["orthospecies_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["orthospecies_extinction_rate"].value)
        manifest_entry["incipient_species_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["incipient_species_extinction_rate"].value)
        manifest_entry["max_time"] = try_to_coerce_to_float(source_tree.annotations["max_time"].value)
        manifest_entry["max_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["max_extant_orthospecies"].value)
        manifest_entry["num_extant_lineages"] = try_to_coerce_to_float(source_tree.annotations["num_extant_lineages"].value)
        manifest_entry["num_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["num_extant_orthospecies"].value)
        manifest_entry["source_tree_type"] = source_tree.annotations["tree_type"].value
        manifest_entry["population_size"] = args.population_size
        manifest_entry["total_number_of_individuals"] = args.total_number_of_individuals
        manifest_entry["num_loci_per_individual"] = args.num_loci_per_individual
        manifest_entry["mutation_rate_per_site"] = args.mutation_rate_per_site

        source_tree.calc_node_ages()
        original_containing_tree_num_species = len(source_tree.taxon_namespace)
        original_containing_tree_species_labels = " ".join(t.label for t in source_tree.taxon_namespace)

        containing_tree, gene_trees = generate_contained_trees(
                containing_tree=source_tree,
                total_number_of_individuals=args.total_number_of_individuals,
                num_gene_trees=args.num_loci_per_individual,
                population_size=args.population_size,
                rng=rng,
                )

        imap_filepath = "{}.input.imap.txt".format(job_title)
        f = open(imap_filepath, "w")
        for taxon in gene_trees.taxon_namespace:
            f.write("{}    {}\n".format(taxon.label.split("^")[1], taxon.population_label))
        f.write("\n//end of file")

        d0 = sg.generate(gene_trees)
        chars_filepath = "{}.input.chars.txt".format(job_title)
        f = open(chars_filepath, "w")
        for cm in d0.char_matrices:
            d0.write(file=f, schema="phylip")
            f.write("\n")

        out_filepath = "{}.results.out.txt".format(job_title)
        mcmc_filepath = "{}.results.mcmc.txt".format(job_title)
        final_containing_tree_num_species = len(containing_tree.taxon_namespace)

        # final_containing_tree_num_species = len(containing_tree.taxon_namespace)
        # final_containing_tree_species_labels = " ".join(t.label for t in containing_tree.taxon_namespace)
        # num_individuals_per_species = " ".join(str(args.num_individuals_per_population) for i in range(len(source_tree.taxon_namespace)))
        final_containing_tree_species_labels = []
        final_containing_tree_num_individuals_per_species = []
        for nd in containing_tree.leaf_node_iter():
            final_containing_tree_species_labels.append(nd.taxon.label)
            final_containing_tree_num_individuals_per_species.append(nd.num_individuals_sampled)
        final_containing_tree_num_species = len(final_containing_tree_species_labels)
        final_containing_tree_species_labels = " ".join(final_containing_tree_species_labels)
        final_containing_tree_num_individuals_per_species = " ".join([str(i) for i in final_containing_tree_num_individuals_per_species])

        theta_prior_mean = args.population_size * 4 * args.mutation_rate_per_site
        theta_prior_a = 2.0
        theta_prior_b = theta_prior_a/theta_prior_mean
        tau_prior_mean = containing_tree.seed_node.age
        tau_prior_a = 2.0
        tau_prior_b = tau_prior_a/tau_prior_mean

        manifest_entry["num_input_lineages"] = final_containing_tree_num_species
        manifest_entry["theta"] = theta_prior_mean
        manifest_entry["theta_prior_a"] = theta_prior_a
        manifest_entry["theta_prior_b"] = theta_prior_b
        manifest_entry["root_age"] = containing_tree.seed_node.age
        manifest_entry["tau_prior_a"] = tau_prior_a
        manifest_entry["tau_prior_b"] = tau_prior_b

        species_tree = containing_tree.as_string(
                schema="newick",
                suppress_leaf_taxon_labels=False,
                suppress_leaf_node_labels=True,
                suppress_internal_taxon_labels=True,
                suppress_internal_node_labels=True,
                suppress_rooting=True,
                suppress_edge_lengths=True,
                unquoted_underscores=True,
                preserve_spaces=True,
                store_tree_weights=False,
                suppress_annotations=True,
                suppress_item_comments=True,
                )
        bpp_config = BPP_TEMPLATE.format(
                chars_filepath=chars_filepath,
                imap_filepath=imap_filepath,
                out_filepath=out_filepath,
                mcmc_filepath=mcmc_filepath,
                num_species=final_containing_tree_num_species,
                species_labels=final_containing_tree_species_labels,
                num_individuals_per_species=final_containing_tree_num_individuals_per_species,
                species_tree=species_tree,
                theta_prior_a=theta_prior_a,
                theta_prior_b=theta_prior_b,
                tau_prior_a=tau_prior_a,
                tau_prior_b=tau_prior_b,
                num_loci=args.num_loci_per_individual,
                )
        bpp_ctl_filepath = "{}.input.bpp.ctl".format(job_title)
        f = open(bpp_ctl_filepath, "w")
        f.write(bpp_config)
        f.write("\n")

        jobf = open("{}.job.sge".format(job_title), "w")
        jobf.write("#! /bin/bash\n")
        jobf.write("#$ -cwd\n")
        jobf.write("#$ -V\n")
        jobf.write("#$ -S /bin/bash\n")
        jobf.write("#$ -l h_vmem=12G\n")
        jobf.write("#$ -l virtual_free=12G\n")
        jobf.write("bpp {}\n".format(bpp_ctl_filepath))

        manifest_entry["source_tree_path"] = filepath
        manifest_entry["results_filepath"] = out_filepath
        manifest_entry["mcmc_filepath"] = mcmc_filepath
        manifest_entries.append(manifest_entry)

Example #8

Show file

File: spdw-gen-seqs.py Project: jeetsukumaran/spdw

def main():
    """
    Main CLI handler.
    """

    parser = argparse.ArgumentParser(description=__description__)
    parser.add_argument("--version",
                        action="version",
                        version="%(prog)s " + __version__)
    parser.add_argument("output_prefix")
    parser.add_argument(
        "-t",
        "--tree-files",
        action="append",
        type=str,
        metavar="TREEFILE",
        help="Path to tree files (default: read from standard input).")
    parser.add_argument("-f",
                        "--input-format",
                        type=str,
                        default="newick",
                        choices=["nexus", "newick"],
                        help="Input data format (default='%(default)s')")
    parser.add_argument(
        "-n",
        "--num-characters-per-locus",
        type=int,
        default=1000,
        help="Number of characters sampled per locus (default: %(default)s).")
    parser.add_argument("--mutation-rate-per-site",
                        type=float,
                        default=0.00001,
                        help="Per-site mutation rate (default: %(default)s).")
    parser.add_argument(
        "-s",
        "--scale-branch-lengths",
        action="store",
        type=float,
        default=1.0,
        help="Scale branch lengths by this factor [default=%(default)s].")
    parser.add_argument("--num-replicates",
                        type=int,
                        default=1,
                        help="Number of replicates (default: %(default)s).")
    parser.add_argument("-F",
                        "--output-format",
                        type=str,
                        default="bpp",
                        choices=["bpp", "nexus", "phylip"],
                        help="Input data format (default='%(default)s')")
    parser.add_argument(
        "--concatenate",
        action="store_true",
        default=False,
        help="Concatenate the alignments across all genealogies")
    parser.add_argument("-z",
                        "--random-seed",
                        type=int,
                        default=None,
                        help="Seed for random number generator engine.")

    args = parser.parse_args()
    if not args.tree_files:
        sys.exit("Please specify path(s) to genealogy tree file(s)")
    sg = seqgen.SeqGen()
    sg.seq_len = args.num_characters_per_locus
    sg.scale_branch_lens = args.mutation_rate_per_site
    gene_trees = dendropy.TreeList()
    for src_idx, src_path in enumerate(args.tree_files):
        if src_path == "-":
            src = sys.stdin
        else:
            src = open(src_path)
        try:
            src_id = src.name
        except AttributeError:
            src_id = "<stdin>"
        with src:
            data = []
            gene_trees.read(file=src,
                            schema=args.input_format,
                            rooting="force-rooted")
    if args.output_format == "bpp":
        for t in gene_trees.taxon_namespace:
            t.label = "^{}".format(t.label)
    for rep_idx in range(args.num_replicates):
        d0 = sg.generate(gene_trees)
        chars_filepath = "{}.{:03d}.chars".format(args.output_prefix,
                                                  rep_idx + 1)
        if args.output_format == "nexus":
            chars_filepath += ".nex"
            d0.write(path=chars_filepath, schema="nexus")
        elif args.output_format == "phylip":
            chars_filepath += ".phylip"
            d0.write(path=chars_filepath, schema="phylip")
        elif args.output_format == "bpp":
            chars_filepath += ".txt"
            f = open(chars_filepath, "w")
            for cm in d0.char_matrices:
                cm.write(file=f, schema="phylip")
                f.write("\n")
        else:
            raise NotImplementedError