Esempio n. 1
0
def _get_stock_psm():
    return protractedspeciation.ProtractedSpeciationProcess(
        speciation_initiation_from_orthospecies_rate=0.01,
        speciation_initiation_from_incipient_species_rate=0.01,
        speciation_completion_rate=0.01,
        orthospecies_extinction_rate=0.01,
        incipient_species_extinction_rate=0.01,
        lineage_label_format_template=
        "S{species_id}-{species_id}-{lineage_id}",  # for compatibility with PBD generated data
        species_label_format_template=
        "S{species_id}-{species_id}-{lineage_id}",  # for compatibility with PBD generated data
    )
Esempio n. 2
0
 def iter_psm_models(self, **kwargs):
     for splitting_rate in (0.1, ):
         for extinction_rate_factor in (0.5, 0.0):
             extinction_rate = splitting_rate * extinction_rate_factor
             for speciation_completion_rate in (splitting_rate * 0.5, ):
                 psm = protractedspeciation.ProtractedSpeciationProcess(
                     speciation_initiation_from_orthospecies_rate=
                     splitting_rate,
                     orthospecies_extinction_rate=extinction_rate,
                     speciation_initiation_from_incipient_species_rate=
                     splitting_rate,
                     speciation_completion_rate=speciation_completion_rate,
                     incipient_species_extinction_rate=extinction_rate,
                     **kwargs)
                 yield psm
Esempio n. 3
0
def call_sample_tree(generate_sample_parameters, generated_protracted_speciation_process_parameters):
    """
    Calls ProtractedSpeciationProcess and generates sample trees.
    :param generated_protracted_speciation_process_parameters:
    :param generate_sample_parameters
    :return trees,
    generate_tree[0] lineage_tree (|Tree| instance) – A tree from the protracted speciation process, with all lineages
    (good species as well as incipient species).
    generate_tree[1] orthospecies_tree (|Tree| instance) – A tree from the protracted speciation process with only
    “good” species.:
    """
    while True:
        try:
            # # calling args
            # values = gen_sample_values(config.get_generate_sample_values())
            # generate trees
            generated_trees = protractedspeciation.ProtractedSpeciationProcess(
                **generated_protracted_speciation_process_parameters).generate_sample(**generate_sample_parameters)
            return generated_trees
        except:
            continue
Esempio n. 4
0
 def iter_test_references(self):
     for tidx, test_ref in enumerate(self.test_reference):
         r = {}
         r["data"] = test_ref
         r["psm"] = protractedspeciation.ProtractedSpeciationProcess(
             speciation_initiation_from_orthospecies_rate=test_ref[
                 "lineage_origination_rate"],
             speciation_initiation_from_incipient_species_rate=test_ref[
                 "lineage_origination_rate"],
             speciation_completion_rate=test_ref[
                 "speciation_completion_rate"],
             orthospecies_extinction_rate=test_ref["extinction_rate"],
             incipient_species_extinction_rate=test_ref["extinction_rate"],
         )
         r["lineage_tree"] = dendropy.Tree.get(
             data=test_ref["lineage_tree"],
             schema="newick",
             rooting="force-rooted",
         )
         r["lineage_tree_incl_extinct"] = dendropy.Tree.get(
             data=test_ref["lineage_tree_incl_extinct"],
             schema="newick",
             rooting="force-rooted",
         )
         r["species_tree_oldest_samples"] = dendropy.Tree.get(
             data=test_ref["species_tree_oldest_samples"],
             schema="newick",
             rooting="force-rooted",
         )
         r["species_tree_youngest_samples"] = dendropy.Tree.get(
             data=test_ref["species_tree_youngest_samples"],
             schema="newick",
             rooting="force-rooted",
         )
         r["lineage_collection"] = self.pbd_table_to_lineage_collection(
             test_ref["lineage_table"])
         yield r
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser()
    parameter_options = parser.add_argument_group("Model Parameters")
    parameter_options.add_argument("--b1", "--speciation_initiation_from_orthospecies_rate",
            type=float,
            dest="speciation_initiation_from_orthospecies_rate",
            default=0.5,
            help="Rate at which orthospecies give rise to new incipient species [default: %(default)s].")
    parameter_options.add_argument("--b2", "--speciation_initiation_from_incipient_species_rate",
            type=float,
            dest="speciation_initiation_from_incipient_species_rate",
            default=0.5,
            help="Rate at which incipient species give rise to new incipient species [default: %(default)s].")
    parameter_options.add_argument("--c1", "--speciation-completion-rate",
            type=float,
            dest="speciation_completion_rate",
            default=0.1,
            help="Rate at which incipient species become orthospecies [default: %(default)s].")
    parameter_options.add_argument("--e1", "--orthospecies-extinction-rate",
            type=float,
            dest="orthospecies_extinction_rate",
            default=0.1,
            help="Rate at which orthospecies go extinct [default: %(default)s].")
    parameter_options.add_argument("--e2", "--incipient-species-extinction-rate",
            type=float,
            dest="incipient_species_extinction_rate",
            default=0.1,
            help="Rate at which incipient species go extinct [default: %(default)s].")
    termination_options = parser.add_argument_group("Simulation Termination Conditions")
    termination_options.add_argument("--max-time",
            type=float,
            default=None,
            help="Maximum length of time to to run (default: %(default)s).")
    termination_options.add_argument("--max-extant-orthospecies",
            type=int,
            default=None,
            help="Maximum number of orthospecies to generate (default: %(default)s).")
    termination_options.add_argument("--max-extant-lineages",
            type=int,
            default=None,
            help="Maximum number of lineages to generate (default: %(default)s).")
    # data_options = parser.add_argument_group("Data Options")
    # data_options.add_argument("--population-size",
    #         type=int,
    #         default=10000,
    #         help="Population size (default: %(default)s).")
    # data_options.add_argument("--num-individuals-per-population",
    #         type=int,
    #         default=4,
    #         help="Number of individuals sampled per incipient species lineage (default: %(default)s).")
    # data_options.add_argument("--num-loci-per-individual",
    #         type=int,
    #         default=10,
    #         help="Number of loci sampled per individual (default: %(default)s).")
    # data_options.add_argument("--num-characters-per-locus",
    #         type=int,
    #         default=10,
    #         help="Number of characters sampled per locus (default: %(default)s).")
    # data_options.add_argument("--mutation-rate-per-site",
    #         type=float,
    #         default=0.00001,
    #         help="Per-site mutation rate (default: %(default)s).")
    run_options = parser.add_argument_group("Run Options")
    run_options.add_argument("-t", "--title",
            default="psmrun",
            help="Run title (default: '%(default)s')")
    run_options.add_argument("-n", "--nreps",
            type=int,
            default=10,
            help="Number of replicates (default: %(default)s).")
    run_options.add_argument("-z", "--random-seed",
            type=int,
            default=None,
            help="Seed for random number generator engine.")
    args = parser.parse_args()

    if not args.max_time and not args.max_extant_orthospecies and not args.max_extant_lineages:
        sys.exit("Need to specify termination condition, at least one of: '--max-time', '--max-extant-orthospecies', '--max-extant-lineages'")
    if args.random_seed is None:
        random_seed = random.randint(0, sys.maxint-1)
    else:
        random_seed = args.random_seed

    s00.log("Random seed: {}".format(random_seed))
    rng = random.Random(random_seed)
    psm = protractedspeciation.ProtractedSpeciationProcess(
            speciation_initiation_from_orthospecies_rate=args.speciation_initiation_from_orthospecies_rate,
            speciation_initiation_from_incipient_species_rate=args.speciation_initiation_from_incipient_species_rate,
            speciation_completion_rate=args.speciation_completion_rate,
            orthospecies_extinction_rate=args.orthospecies_extinction_rate,
            incipient_species_extinction_rate=args.incipient_species_extinction_rate,
            rng=rng,)
    # sg = seqgen.SeqGen()
    # sg.scale_branch_lengths = args.mutation_rate_per_site
    for rep in range(args.nreps):
        job_title = "{}_{:05d}".format(args.title, rep+1)
        s00.log("Replicate {} of {}: {}".format(rep+1, args.nreps, job_title))
        while True:
            lineage_tree, orthospecies_tree = psm.generate_sample(
                    max_time=args.max_time,
                    max_extant_orthospecies=args.max_extant_orthospecies,
                    max_extant_lineages=args.max_extant_lineages,
                    is_initial_lineage_orthospecies=True,
                    # is_correlate_lineage_and_species_trees=True,
                    )
            num_extant_lineages = len([nd for nd in lineage_tree.leaf_node_iter()])
            num_extant_orthospecies = len([nd for nd in orthospecies_tree.leaf_node_iter()])
            if num_extant_lineages < 3:
                s00.log("Too few lineages ({}): Repeating replicate {} of {}: {}".format(num_extant_lineages, rep+1, args.nreps, job_title))
            # elif num_extant_orthospecies < 2:
            #     s00.log("Too few orthospecies ({}): Repeating replicate {} of {}: {}".format(num_extant_orthospecies, rep+1, args.nreps, job_title))
            else:
                break
        set_lineage_tree_taxa_from_labels(lineage_tree)
        set_orthospecies_tree_taxa_from_labels(orthospecies_tree)
        s00.log("Lineage tree size: {}, Orthospecies tree size: {}".format(num_extant_lineages, num_extant_orthospecies))
        for tree in (lineage_tree, orthospecies_tree):
            tree.annotations["speciation_initiation_from_orthospecies_rate"] = args.speciation_initiation_from_orthospecies_rate
            tree.annotations["speciation_initiation_from_incipient_species_rate"] = args.speciation_initiation_from_incipient_species_rate
            tree.annotations["speciation_completion_rate"] = args.speciation_completion_rate
            tree.annotations["orthospecies_extinction_rate"] = args.orthospecies_extinction_rate
            tree.annotations["incipient_species_extinction_rate"] = args.incipient_species_extinction_rate
            tree.annotations["max_simulation_time"] = args.max_time
            tree.annotations["max_extant_orthospecies"] = args.max_extant_orthospecies
            tree.annotations["num_extant_lineages"] = num_extant_lineages
            tree.annotations["num_extant_orthospecies"] = num_extant_orthospecies
        lineage_tree.annotations["tree_type"] = "lineage"
        orthospecies_tree.annotations["tree_type"] = "orthospecies"
        lineage_tree.write(
                path="{}.lineages.tre".format(job_title),
                schema="nexus",
                suppress_annotations=False)
        orthospecies_tree.write(
                path="{}.species.tre".format(job_title),
                schema="nexus",
                suppress_annotations=False)
Esempio n. 6
0
 def test_lineage_species_tip_correlation(self):
     psm = protractedspeciation.ProtractedSpeciationProcess(
         speciation_initiation_from_orthospecies_rate=0.1,
         speciation_initiation_from_incipient_species_rate=0.1,
         speciation_completion_rate=0.05,
         orthospecies_extinction_rate=0.0,
         incipient_species_extinction_rate=0.00,
     )
     lineage_tree, orthospecies_tree = psm.generate_sample(
         num_extant_orthospecies=5)
     # for seed in itertools.chain((559, 631, 230, 212, 907, 237,), (random.randint(0, 1000) for i in range(10))):
     for seed in itertools.chain((
             559,
             631,
             230,
             212,
             907,
             237,
     ), (random.randint(0, 1000) for i in range(20))):
         rng = random.Random(seed)
         for psm in self.iter_psm_models(rng=rng):
             for test_idx, (lineage_tree, orthospecies_tree) in enumerate(
                     self.iter_samples(psm)):
                 for tree in (lineage_tree, orthospecies_tree):
                     self.check(tree)
                 lineage_tree_species_labels = set([
                     taxon.label.split(".")[0]
                     for taxon in lineage_tree.taxon_namespace
                 ])
                 species_tree_taxon_labels = set([
                     taxon.label
                     for taxon in orthospecies_tree.taxon_namespace
                 ])
                 self.assertEqual(lineage_tree_species_labels,
                                  species_tree_taxon_labels)
                 # print("\nLineages: {}\nSpecies: {}".format(",".join(lineage_tree_species_labels), ", ".join(species_tree_taxon_labels)))
                 check_species_node_lineage_nodes_map = {}
                 for lineage_node in lineage_tree.leaf_node_iter():
                     lineage_node_species_node = getattr(
                         lineage_node,
                         psm.lineage_tree_to_species_tree_node_attr)
                     species_node_lineage_nodes = getattr(
                         lineage_node_species_node,
                         psm.species_tree_to_lineage_tree_node_attr)
                     self.assertIn(lineage_node, species_node_lineage_nodes)
                     self.assertEqual(
                         lineage_node.taxon.label.split(".")[0],
                         lineage_node_species_node.taxon.label)
                     try:
                         check_species_node_lineage_nodes_map[
                             lineage_node_species_node].add(lineage_node)
                     except KeyError:
                         check_species_node_lineage_nodes_map[
                             lineage_node_species_node] = set(
                                 [lineage_node])
                 for species_node in orthospecies_tree.leaf_node_iter():
                     species_node_lineage_nodes = getattr(
                         species_node,
                         psm.species_tree_to_lineage_tree_node_attr)
                     self.assertEqual(
                         check_species_node_lineage_nodes_map[species_node],
                         species_node_lineage_nodes)
Esempio n. 7
0
def main():
    """
    Main CLI handler.
    """

    parser = argparse.ArgumentParser()
    parser.add_argument("-t",
                        "--title",
                        action="store",
                        default="delineate-test",
                        help="Title of run [default=%(default)s].")
    parser.add_argument(
        "--splitting-rate",
        action="store",
        type=float,
        default=0.10,
        help=
        "Rate of origin of new lineages (population isolation rate) [default=%(default)s]."
    )
    parser.add_argument(
        "--speciation-completion-rate",
        action="store",
        type=float,
        default=0.01,
        help=
        "Rate at which lineage develop into independent species [default=%(default)s]."
    )
    parser.add_argument(
        "-n",
        "--num-replicates",
        type=int,
        default=30,
        help=
        "Number of replicates per combination of parameters (default=%(default)s)."
    )
    parser.add_argument("-z",
                        "--random-seed",
                        default=None,
                        help="Random seed.")
    parser.add_argument(
        "-u",
        "--underflow-protection",
        action="store_true",
        default=False,
        help=
        "Try to protect against underflow by using special number handling classes (slow).",
    )
    parser.add_argument(
        "--clean",
        action="store_true",
        default=False,
        help=
        "Clean up run *data* files after post-run analysis (ONLY summaries, job, and standard output will be kept)."
    )
    parser.add_argument(
        "--very-clean",
        action="store_true",
        default=False,
        help=
        "Clean up run job and *data* files after post-run analysis (ONLY summaries and standard output will be kept)."
    )
    parser.add_argument("--write-extra-for-demo",
                        action="store_true",
                        default=False,
                        help="Write extra files for demonstration purposes.")

    regime_group = parser.add_argument_group("Regime")
    regime_group.add_argument(
        "--max-time",
        default=None,
        type=float,
        help="Source trees generated with this crown age.")
    regime_group.add_argument(
        "--num-extant-lineages",
        default=None,
        type=int,
        help=
        "Source trees generated with exactly this number of tip lineages (incipient species + orthospecies)."
    )
    regime_group.add_argument(
        "--min-extant-lineages",
        default=None,
        type=int,
        help=
        "Source trees generated with at least this number of tip lineages (incipient species + orthospecies)."
    )
    regime_group.add_argument(
        "--num-extant-orthospecies",
        default=None,
        type=int,
        help=
        "Source trees generated with this number of orthospecies ('good' or true species)."
    )
    regime_group.add_argument(
        "--min-extant-orthospecies",
        default=2,
        type=int,
        help=
        "Reject source trees with less than this number of orthospecies ('good' or true species)."
    )
    partition_estimation_test_group = parser.add_argument_group(
        "Partition Estimation Test Settings")
    partition_estimation_test_group.add_argument(
        "--constrain-partitions",
        dest="constrain_partitions",
        choices=["random", "topological"],
        default="random",
        help="""
            Constrain partition sets by specifying the (true) species assignments of some lineages.
            Options are:
                'random': randomly select lineages from leaf set;
                'topological': select random internal node in tree to suppress (species assignments
                of all leaves not descending from this node will be known).
            """)
    partition_estimation_test_group.add_argument(
        "--num-unconstrained-leaves",
        default=None,
        type=int,
        help=
        "Exact number of leaves with unknown species assignments (overrides min/max below)."
    )
    partition_estimation_test_group.add_argument(
        "--min-unconstrained-leaves",
        default=None,
        type=int,
        help="Minimum number of leaves with unknown species assignments.")
    partition_estimation_test_group.add_argument(
        "--max-unconstrained-leaves",
        default=None,
        type=int,
        help="Maximum number of leaves with unknown species assignments.")
    partition_estimation_test_group.add_argument(
        "--specify-true-speciation-completion-rate",
        action="store_true",
        default=False,
        help=
        "True speciation completion rate will be provided to partition probability calculator."
    )
    args = parser.parse_args()
    command_kwargs = {}

    selected_condition = None
    for kw in ("max_time", "num_extant_lineages", "num_extant_orthospecies"):
        if getattr(args, kw) is not None:
            if selected_condition:
                sys.exit(
                    "Need to specify only one of: '--max-time', '--num-extant-lineages', '--num-extant-orthospecies'"
                )
            selected_condition = kw
            command_kwargs[kw] = getattr(args, kw)
    if selected_condition is None:
        sys.exit(
            "Need to specify at least one of: '--max-time', '--num-extant-lineages', '--num-extant-orthospecies'"
        )
    if args.random_seed is None:
        args.random_seed = random.randrange(sys.maxsize)
    rng = random.Random(args.random_seed)
    batch_id = "::".join([
        socket.gethostname(),
        START_DATETIME.strftime("%Y%m%d%H%M%S"),
        str(args.random_seed),
    ])
    log_message = [
        batch_id,
        START_DATETIME.strftime("%Y-%m-%d"),
        START_DATETIME.strftime("%H:%M:%S"),
        "'" + os.path.abspath(os.getcwd()) + "'",
        "'" + " ".join(sys.argv) + "'",
        str(args.random_seed),
    ]
    with open(LOGPATH, "a") as dest:
        dest.write("\t".join(log_message) + "\n")
    color_map = ColorMap()

    true_speciation_completion_rate = args.speciation_completion_rate
    splitting_rate = args.splitting_rate
    extinction_rate = 0.0
    data = collections.OrderedDict()
    data["params"] = collections.OrderedDict()
    data["params"]["good_species_speciation_initiation_rate"] = splitting_rate
    data["params"][
        "true_speciation_completion_rate"] = true_speciation_completion_rate
    data["params"][
        "incipient_species_speciation_initiation_rate"] = splitting_rate
    data["params"]["good_species_extinction_rate"] = extinction_rate
    data["params"]["incipient_species_extinction_rate"] = extinction_rate
    data["condition"] = selected_condition
    data["condition_value"] = command_kwargs[selected_condition]
    data["trees"] = []
    psm = protractedspeciation.ProtractedSpeciationProcess(
        speciation_initiation_from_orthospecies_rate=splitting_rate,
        speciation_initiation_from_incipient_species_rate=splitting_rate,
        speciation_completion_rate=true_speciation_completion_rate,
        orthospecies_extinction_rate=extinction_rate,
        incipient_species_extinction_rate=extinction_rate,
        rng=rng)
    output_prefix = "{}_spr{:0.3f}_".format(args.title,
                                            true_speciation_completion_rate)
    tree_idx = 0
    for tree_idx in range(args.num_replicates):
        while True:
            # make sure that the tree we generate has enough species
            lineage_tree, orthospecies_tree = psm.generate_sample(
                **command_kwargs)
            if len(orthospecies_tree.taxon_namespace
                   ) >= args.min_extant_orthospecies:
                ok = []
                if args.min_unconstrained_leaves:
                    if len(lineage_tree.taxon_namespace
                           ) >= args.min_unconstrained_leaves:
                        ok.append(True)
                    else:
                        ok.append(False)
                if args.min_extant_lineages:
                    if len(lineage_tree.taxon_namespace
                           ) >= args.min_extant_lineages:
                        ok.append(True)
                    else:
                        ok.append(False)
                if all(ok):
                    break
        sorted_species_lineages_map = collections.OrderedDict()
        lineage_label_species_label_map = {}
        for k in sorted([t.label for t in orthospecies_tree.taxon_namespace]):
            sorted_species_lineages_map[k] = []
        for ond in orthospecies_tree.leaf_node_iter():
            sorted_species_lineages_map[ond.taxon.label] = sorted(
                [lnd.taxon.label for lnd in ond.lineage_tree_nodes])
            for lnd in ond.lineage_tree_nodes:
                lineage_label_species_label_map[
                    lnd.taxon.label] = ond.taxon.label
        true_species_leafsets = sorted(sorted_species_lineages_map.values())
        entry = collections.OrderedDict()
        entry["tree_filepath"] = "{}.{:04d}.nex".format(
            output_prefix, tree_idx + 1)
        entry["run_config_filepath"] = "{}.{:04d}.json".format(
            output_prefix, tree_idx + 1)
        entry["lineage_taxon_namespace"] = [
            t.label for t in lineage_tree.taxon_namespace
        ]
        entry["lineage_tree"] = lineage_tree.as_string("newick").replace(
            "\n", "")
        entry["species_taxon_namespace"] = sorted(
            sorted_species_lineages_map.keys())
        entry["species_tree"] = orthospecies_tree.as_string("newick").replace(
            "\n", "")
        entry["species_lineages_map"] = sorted_species_lineages_map
        data["trees"].append(entry)
        lineage_tree.write(path=entry["tree_filepath"], schema="nexus")

        config = collections.OrderedDict()
        # for speciation rate estimation; ignored by species partition estimation
        # for species partition estimation
        species_leafset_constraints = None
        if args.constrain_partitions is not None:
            if args.constrain_partitions == "topological":
                lineage_tree_internal_nodes = [
                    lnd for lnd in lineage_tree.postorder_internal_node_iter()
                    if lnd is not lineage_tree.seed_node
                ]
                rng.shuffle(lineage_tree_internal_nodes)
                true_unconstrained_lineage_leaf_labels = None
                for lineage_tree_internal_node in lineage_tree_internal_nodes:
                    true_unconstrained_lineage_leaf_labels = set([
                        lineage_tree_leaf_node.taxon.label
                        for lineage_tree_leaf_node in
                        lineage_tree_internal_node.leaf_iter()
                    ])
                    if not args.num_unconstrained_leaves and not args.min_unconstrained_leaves and not args.max_unconstrained_leaves:
                        break
                    elif args.num_unconstrained_leaves:
                        if len(true_unconstrained_lineage_leaf_labels
                               ) == args.num_unconstrained_leaves:
                            break
                    elif args.min_unconstrained_leaves and args.max_unconstrained_leaves:
                        if len(true_unconstrained_lineage_leaf_labels
                               ) >= args.min_unconstrained_leaves and len(
                                   true_unconstrained_lineage_leaf_labels
                               ) <= args.max_unconstrained_leaves:
                            break
                    elif args.min_unconstrained_leaves and len(
                            true_unconstrained_lineage_leaf_labels
                    ) >= args.min_unconstrained_leaves:
                        break
                    elif args.max_unconstrained_leaves and len(
                            true_unconstrained_lineage_leaf_labels
                    ) <= args.max_unconstrained_leaves:
                        break
                    true_unconstrained_lineage_leaf_labels = None
                else:
                    raise ValueError(
                        "Unable to meet min/max unconstrained leaves criteria."
                    )
            elif args.constrain_partitions == "random":
                lineage_leaf_labels = [
                    taxon.label for taxon in lineage_tree.taxon_namespace
                ]
                if args.num_unconstrained_leaves:
                    num_to_sample = args.num_unconstrained_leaves
                else:
                    if args.min_unconstrained_leaves:
                        min_count = args.min_unconstrained_leaves
                    else:
                        min_count = 1
                    if args.max_unconstrained_leaves:
                        max_count = args.max_unconstrained_leaves
                    else:
                        max_count = len(lineage_leaf_labels)
                    num_to_sample = rng.randint(min_count, max_count)
                true_unconstrained_lineage_leaf_labels = rng.sample(
                    lineage_leaf_labels, num_to_sample)
            true_constrained_lineage_leaf_labels = sorted([
                lineage_tree_leaf_node.taxon.label
                for lineage_tree_leaf_node in lineage_tree.leaf_node_iter()
                if lineage_tree_leaf_node.taxon.label not in
                true_unconstrained_lineage_leaf_labels
            ])
            species_leafset_constraint_label_map = collections.OrderedDict()
            for lineage_leaf_label in true_constrained_lineage_leaf_labels:
                true_species_label = lineage_label_species_label_map[
                    lineage_leaf_label]
                try:
                    species_leafset_constraint_label_map[
                        true_species_label].append(lineage_leaf_label)
                except KeyError:
                    species_leafset_constraint_label_map[
                        true_species_label] = [lineage_leaf_label]
            for species_label in species_leafset_constraint_label_map:
                species_leafset_constraint_label_map[species_label].sort()
            species_leafset_constraints = []
            for sp in sorted_species_lineages_map:
                if sp in species_leafset_constraint_label_map:
                    species_leafset_constraints.append(
                        sorted(species_leafset_constraint_label_map[sp]))
        else:
            assert args.constrain_partitions is None
            assert args.num_unconstrained_leaves is None
            assert args.min_unconstrained_leaves is None
            assert args.max_unconstrained_leaves is None
            true_constrained_lineage_leaf_labels = []
            true_unconstrained_lineage_leaf_labels = [
                lineage_tree_leaf_node.taxon.label
                for lineage_tree_leaf_node in lineage_tree.leaf_node_iter()
            ]
            species_leafset_constraint_label_map = {}
            species_leafset_constraints = None
        if species_leafset_constraints is not None:
            # this is actually used by the DELINEATE program
            assert args.constrain_partitions is not None
            config["species_leafset_constraints"] = species_leafset_constraints
        else:
            assert args.constrain_partitions is None
            try:
                del config["species_leafset_constraints"]
            except KeyError:
                pass

        # extra files for demo
        if args.write_extra_for_demo:
            constrained_color, unconstrained_color = ColorMap.contrast_pairs[0]
            demo_output_prefix = "{}.{:04d}.demo".format(
                output_prefix, tree_idx + 1)
            true_unconstrained_lineage_leaf_label_set = set(
                true_unconstrained_lineage_leaf_labels)
            for nd in lineage_tree:
                if nd.is_leaf():
                    is_constrained = nd.taxon.label in true_unconstrained_lineage_leaf_label_set
                    nd.annotations["constrained"] = is_constrained
                    species_label = lineage_label_species_label_map[
                        nd.taxon.label]
                    nd.annotations["species"] = species_label
                    if False:  #args.color_by_species:
                        nd.annotations["!color"] = color_map(species_label)
                    else:
                        if is_constrained:
                            nd.annotations["!color"] = constrained_color
                        else:
                            nd.annotations["!color"] = unconstrained_color
                else:
                    nd.annotations["!color"] = "#aaaaaa"
            lineage_tree_figtree_block = [
                'set appearance.branchLineWidth=5.0;',
                'set scaleBar.isShown=false;',
            ]
            # lineage_tree_figtree_block.extend([
            #         'set nodeShapeExternal.colourAttribute="constrained";',
            #         'set nodeShapeExternal.isShown=true;',
            #         'set nodeShapeExternal.minSize=10.0;',
            #         'set nodeShapeExternal.scaleType=Width;',
            #         'set nodeShapeExternal.shapeType=Circle;',
            #         'set nodeShapeExternal.size=10.0;',
            #         'set nodeShapeExternal.sizeAttribute="Fixed";',
            #     ])
            lineage_tree_figtree_block.extend([
                'set tipLabels.colorAttribute="species";',
                'set tipLabels.displayAttribute="species";',
                'set tipLabels.fontName="sansserif";',
                'set tipLabels.fontSize=30;',
                'set tipLabels.fontStyle=0;',
                'set tipLabels.isShown=true;',
                'set tipLabels.significantDigits=4;',
            ])
            # lineage_tree_figtree_block.extend([
            # 'set legend.attribute="constrained";',
            # 'set legend.fontSize=10.0;',
            # 'set legend.isShown=true;',
            # 'set legend.significantDigits=4;',
            #     ])
            lineage_tree_figtree_block = "begin figtree;\n{}\nend;\n".format(
                "\n".join(lineage_tree_figtree_block))
            lineage_tree.write(
                path="{}.lineages.nex".format(demo_output_prefix),
                supplemental_blocks=[lineage_tree_figtree_block],
                schema="nexus")

            orthospecies_tree_figtree_block = [
                'set appearance.branchLineWidth=5.0;',
                'set scaleBar.isShown=false;',
            ]
            # orthospecies_tree_figtree_block.extend([
            #         'set nodeShapeExternal.colourAttribute="constrained";',
            #         'set nodeShapeExternal.isShown=true;',
            #         'set nodeShapeExternal.minSize=10.0;',
            #         'set nodeShapeExternal.scaleType=Width;',
            #         'set nodeShapeExternal.shapeType=Circle;',
            #         'set nodeShapeExternal.size=10.0;',
            #         'set nodeShapeExternal.sizeAttribute="Fixed";',
            #     ])
            orthospecies_tree_figtree_block.extend([
                # 'set tipLabels.colorAttribute="Name";',
                'set tipLabels.displayAttribute="Name";',
                'set tipLabels.fontName="sansserif";',
                'set tipLabels.fontSize=30;',
                'set tipLabels.fontStyle=0;',
                'set tipLabels.isShown=true;',
                'set tipLabels.significantDigits=4;',
            ])
            # orthospecies_tree_figtree_block.extend([
            # 'set legend.attribute="constrained";',
            # 'set legend.fontSize=10.0;',
            # 'set legend.isShown=true;',
            # 'set legend.significantDigits=4;',
            #     ])
            orthospecies_tree_figtree_block = "begin figtree;\n{}\nend;\n".format(
                "\n".join(orthospecies_tree_figtree_block))
            orthospecies_tree.write(
                path="{}.species.nex".format(demo_output_prefix),
                supplemental_blocks=[orthospecies_tree_figtree_block],
                schema="nexus")

        # for post analysis assessment (not used by the inference program)
        config["test_info"] = collections.OrderedDict()
        config["test_info"]["species_leafsets"] = true_species_leafsets
        config["test_info"]["constrained_lineages"] = sorted(
            true_constrained_lineage_leaf_labels)
        config["test_info"]["unconstrained_lineages"] = sorted(
            true_unconstrained_lineage_leaf_labels)
        config["test_info"][
            "species_leafset_constraint_label_map"] = species_leafset_constraint_label_map
        config["test_info"][
            "species_partition_estimation_num_constrained_species"] = len(
                species_leafset_constraint_label_map)
        config["test_info"][
            "species_partition_estimation_num_constrained_lineages"] = len(
                true_constrained_lineage_leaf_labels)
        config["test_info"][
            "species_partition_estimation_num_unconstrained_lineages"] = len(
                true_unconstrained_lineage_leaf_labels)
        config["test_info"][
            "species_partition_estimation_num_unconstrained_lineages"] = len(
                true_unconstrained_lineage_leaf_labels)
        config["test_info"][
            "true_speciation_completion_rate"] = true_speciation_completion_rate  # not actually used?
        config["test_info"]["true_species_leafsets"] = true_species_leafsets
        config["test_info"]["true_num_species"] = len(true_species_leafsets)
        with open(entry["run_config_filepath"], "w") as dest:
            json.dump(config, dest, indent=2)
        job_prefix = entry["tree_filepath"].replace(".nex", "")
        print(job_prefix)
        job_commands = []
        to_clean = []
        common_settings = {
            "run_config_filepath": entry["run_config_filepath"],
            "tree_filepath": entry["tree_filepath"],
            "num_lineages": len(entry["lineage_taxon_namespace"]),
            "num_species": len(entry["species_taxon_namespace"]),
            "true_speciation_completion_rate": true_speciation_completion_rate,
            "batch_id": batch_id,
        }
        to_clean.append(common_settings["run_config_filepath"])
        to_clean.append(common_settings["tree_filepath"])
        if args.underflow_protection:
            underflow_protection = "--underflow-protection"
        else:
            underflow_protection = ""
        job_kwargs = dict(common_settings)
        job_kwargs["underflow_protection"] = underflow_protection
        job_kwargs[
            "delineate_results_filepath"] = job_prefix + ".partition-probs.json"
        to_clean.append(job_kwargs["delineate_results_filepath"])
        if args.specify_true_speciation_completion_rate:
            job_kwargs[
                "speciation_completion_rate"] = "--speciation-completion-rate {}".format(
                    true_speciation_completion_rate)
        else:
            job_kwargs["speciation_completion_rate"] = ""
        job_kwargs[
            "post_analysis_performance_assessment_command"] = "spdw-evaluate-delineate-jobs.py".format(
                SCRIPT_DIR)
        job_commands.append(
            species_partition_estimation_job_template.format(**job_kwargs))
        job_kwargs[
            "joint_performance_assessment_results_filepath"] = job_prefix + ".joint-partition-est-perf.tsv"
        job_commands.append(
            species_partition_estimation_joint_probability_analysis_template.
            format(**job_kwargs))
        job_filepath = job_prefix + ".job"
        if args.clean or args.very_clean:
            clean_command = ["rm", "-f"]
            clean_command.extend(to_clean)
            if args.very_clean:
                clean_command.append(job_filepath)
            job_commands.append(" ".join(clean_command))
        with open(job_filepath, "w") as dest:
            dest.write(template.format(jobs="\n".join(job_commands)))
    if not args.clean and not args.very_clean:
        with open(output_prefix + ".json", "w") as dest:
            json.dump(data, dest, indent=2)
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser()
    parameter_options = parser.add_argument_group("Model Parameters")
    parameter_options.add_argument("--b1", "--speciation_initiation_from_orthospecies_rate",
            type=float,
            dest="speciation_initiation_from_orthospecies_rate",
            default=1.0,
            help="Rate at which orthospecies give rise to new incipient species [default: %(default)s].")
    parameter_options.add_argument("--b2", "--speciation_initiation_from_incipient_species_rate",
            type=float,
            dest="speciation_initiation_from_incipient_species_rate",
            default=1.0,
            help="Rate at which incipient species give rise to new incipient species [default: %(default)s].")
    parameter_options.add_argument("--c1", "--speciation-completion-rate",
            type=float,
            dest="speciation_completion_rate",
            default=1.0,
            help="Rate at which incipient species become orthospecies [default: %(default)s].")
    parameter_options.add_argument("--e1", "--orthospecies-extinction-rate",
            type=float,
            dest="orthospecies_extinction_rate",
            default=1.0,
            help="Rate at which orthospecies go extinct [default: %(default)s].")
    parameter_options.add_argument("--e2", "--incipient-species-extinction-rate",
            type=float,
            dest="incipient_species_extinction_rate",
            default=1.0,
            help="Rate at which incipient species go extinct [default: %(default)s].")
    termination_options = parser.add_argument_group("Simulation Termination Conditions")
    termination_options.add_argument("--max-time",
            type=float,
            default=None,
            help="Maximum length of time to to run (default: %(default)s).")
    termination_options.add_argument("--max-extant-orthospecies",
            type=int,
            default=None,
            help="Maximum number of orthospecies to generate (default: %(default)s).")
    termination_options.add_argument("--max-extant-lineages",
            type=int,
            default=None,
            help="Maximum number of lineages to generate (default: %(default)s).")
    data_options = parser.add_argument_group("Data Options")
    data_options.add_argument("--population-size",
            type=int,
            default=10000,
            help="Population size (default: %(default)s).")
    data_options.add_argument("--num-individuals-per-population",
            type=int,
            default=4,
            help="Number of individuals sampled per incipient species lineage (default: %(default)s).")
    data_options.add_argument("--num-loci-per-individual",
            type=int,
            default=10,
            help="Number of loci sampled per individual (default: %(default)s).")
    data_options.add_argument("--num-characters-per-locus",
            type=int,
            default=10,
            help="Number of characters sampled per locus (default: %(default)s).")
    data_options.add_argument("--mutation-rate-per-site",
            type=float,
            default=0.00001,
            help="Per-site mutation rate (default: %(default)s).")
    run_options = parser.add_argument_group("Run Options")
    run_options.add_argument("-t", "--run-title",
            default="run",
            help="Run title (default: '%(default)s')")
    run_options.add_argument("-n", "--nreps",
            type=int,
            default=10,
            help="Number of replicates (default: %(default)s).")
    run_options.add_argument("-z", "--random-seed",
            type=int,
            default=None,
            help="Seed for random number generator engine.")
    args = parser.parse_args()

    if not args.max_time and not args.max_extant_orthospecies and not args.max_extant_lineages:
        sys.exit("Need to specify termination condition, at least one of: '--max-time', '--max-extant-orthospecies', '--max-extant-lineages'")
    if args.random_seed is None:
        random_seed = random.randint(0, sys.maxint-1)
    else:
        random_seed = args.random_seed

    _log("Random seed: {}".format(random_seed))
    rng = random.Random(random_seed)
    psm = protractedspeciation.ProtractedSpeciationProcess(
            speciation_initiation_from_orthospecies_rate=args.speciation_initiation_from_orthospecies_rate,
            orthospecies_extinction_rate=args.orthospecies_extinction_rate,
            speciation_initiation_from_incipient_species_rate=args.speciation_initiation_from_incipient_species_rate,
            speciation_completion_rate=args.speciation_completion_rate,
            incipient_species_extinction_rate=args.incipient_species_extinction_rate,
            rng=rng,)
    sg = seqgen.SeqGen()
    sg.scale_branch_lengths = args.mutation_rate_per_site
    for rep in range(args.nreps):
        job_title = "{}_{:03d}".format(args.run_title, rep+1)
        _log("Replicate {} of {}: {}".format(rep+1, args.nreps, job_title))
        lineage_tree, orthospecies_tree = psm.generate_sample(
                max_time=args.max_time,
                max_extant_orthospecies=args.max_extant_orthospecies,
                max_extant_lineages=args.max_extant_lineages,
                is_initial_lineage_orthospecies=True,
                # is_correlate_lineage_and_species_trees=True,
                )
        # lineage_tree.calc_node_ages()
        # orthospecies_tree.calc_node_ages()
        # _log("    Incipient species tree: {} tips, root age = {} ({} mutation units)".format(len(lineage_tree.leaf_nodes()), lineage_tree.seed_node.age, lineage_tree.seed_node.age * args.mutation_rate_per_site,))
        # _log("    Orthospecies tree:      {} tips, root age = {} ({} mutation units)".format(len(orthospecies_tree.leaf_nodes()), orthospecies_tree.seed_node.age))
        label_lineage_tree(lineage_tree)
        label_orthospecies_tree(orthospecies_tree)
        lineage_tree.write(path="x1.nexus", schema="nexus")
        orthospecies_tree.write(path="x2.nexus", schema="nexus")

        logf = open("{}.setup.log".format(job_title), "w")
        logf.write("-  Replicate {} of {} generated by command: {}\n".format(rep+1, args.nreps, " ".join(sys.argv)))
        logf.write("\n")
        logf.write("-  Random seed used: {}\n".format(random_seed))
        logf.write("\n")
        describe_tree(logf, lineage_tree, "-  Lineage Tree Profile")
        logf.write("\n")
        describe_tree(logf, orthospecies_tree, "-  Orthospecies Tree Profile")
        logf.write("\n")
        logf.write("-  Protracted Speciation Model Parameters\n")
        logf.write("   -       Speciation initiation from orthospecies rate: {}\n".format(args.speciation_initiation_from_orthospecies_rate))
        logf.write("   -  Speciation initiation from incipient species rate: {}\n".format(args.speciation_initiation_from_incipient_species_rate))
        logf.write("   -                         Speciation completion rate: {}\n".format(args.speciation_completion_rate))
        logf.write("   -                       Orthospecies extinction rate: {}\n".format(args.orthospecies_extinction_rate))
        logf.write("   -                  Incipient species extinction rate: {}\n".format(args.incipient_species_extinction_rate))
        logf.write("   -               Termination: Maximum simulation time: {}\n".format(args.max_time))
        logf.write("   -                  Termination: Maximum orthospecies: {}\n".format(args.max_extant_orthospecies))
        logf.write("   -                      Termination: Maximum lineages: {}\n".format(args.max_extant_lineages))
        logf.write("\n")
        logf.write("-  Data Generation Parameters\n")
        logf.write("   -                                    Population size: {}\n".format(args.population_size))
        logf.write("   -                 Individuals per species/population: {}\n".format(args.num_individuals_per_population))
        logf.write("   -                      Number of loci per individual: {}\n".format(args.num_loci_per_individual))
        logf.write("   -                             Per-site mutation rate: {}\n".format(args.mutation_rate_per_site))
        logf.write("\n")
def main():
    """
    Main CLI handler.
    """

    parser = argparse.ArgumentParser()
    parser.add_argument("test_type",
                        default="None",
                        action="store",
                        help="Type of test: {}.".format(test_types_desc))
    parser.add_argument("-t",
                        "--title",
                        default="run",
                        help="Name for this run.")
    parser.add_argument("-c",
                        "--cluster",
                        choices=["flux", "kuhpc", "mesxuuyan"],
                        default=None,
                        help="Scheduler type.")
    parser.add_argument(
        "-n",
        "--num-replicates",
        type=int,
        default=30,
        help=
        "Number of replicates per combination of parameters (default=%(default)s)."
    )
    parser.add_argument("-z",
                        "--random-seed",
                        default=None,
                        help="Random seed.")
    parser.add_argument(
        "-u",
        "--underflow-protection",
        action="store_true",
        default=False,
        help=
        "Try to protect against underflow by using special number handling classes (slow).",
    )
    parser.add_argument("--check-mode",
                        action="store_true",
                        dest="check_mode",
                        default=False,
                        help="Generate small data set for checking.")
    parser.add_argument("--mem",
                        default=120,
                        help="[FLUX only] Memory size (in GB).")
    parser.add_argument(
        "--clean",
        action="store_true",
        default=False,
        help=
        "Clean up run *data* files after post-run analysis (ONLY summaries, job, and standard output will be kept)."
    )
    parser.add_argument(
        "--very-clean",
        action="store_true",
        default=False,
        help=
        "Clean up run job and *data* files after post-run analysis (ONLY summaries and standard output will be kept)."
    )

    regime_group = parser.add_argument_group("Regime")
    regime_group.add_argument(
        "--max-time",
        default=None,
        type=float,
        help="Source trees generated with this crown age.")
    regime_group.add_argument(
        "--num-extant-lineages",
        default=None,
        type=int,
        help=
        "Source trees generated with exactly this number of tip lineages (incipient species + orthospecies)."
    )
    regime_group.add_argument(
        "--min-extant-lineages",
        default=None,
        type=int,
        help=
        "Source trees generated with at least this number of tip lineages (incipient species + orthospecies)."
    )
    regime_group.add_argument(
        "--num-extant-orthospecies",
        default=None,
        type=int,
        help=
        "Source trees generated with this number of orthospecies ('good' or true species)."
    )
    regime_group.add_argument(
        "--min-extant-orthospecies",
        default=2,
        type=int,
        help=
        "Reject source trees with less than this number of orthospecies ('good' or true species)."
    )
    # regime_group.add_argument("--rate-sweep",
    #         default=None,
    #         choices=["1", "2",],
    #         help="Speciation completion rate sweep: 1 = [0.01, 0.02, ..., 0.1]; 2 = [{}]".format(rate_sweep_2))
    partition_estimation_test_group = parser.add_argument_group(
        "Partition Estimation Test Settings")
    partition_estimation_test_group.add_argument(
        "--constrain-partitions",
        dest="constrain_partitions",
        choices=["random", "topological"],
        default=None,
        help="""
            Constrain partition sets by specifying the (true) species assignments of some lineages.
            Options are:
                'random': randomly select lineages from leaf set;
                'topological': select random internal node in tree to suppress (species assignments
                of all leaves not descending from this node will be known).
            """)
    partition_estimation_test_group.add_argument(
        "--num-unconstrained-leaves",
        default=None,
        type=int,
        help=
        "Exact number of leaves with unknown species assignments (overrides min/max below)."
    )
    partition_estimation_test_group.add_argument(
        "--min-unconstrained-leaves",
        default=None,
        type=int,
        help="Minimum number of leaves with unknown species assignments.")
    partition_estimation_test_group.add_argument(
        "--max-unconstrained-leaves",
        default=None,
        type=int,
        help="Maximum number of leaves with unknown species assignments.")
    partition_estimation_test_group.add_argument(
        "--specify-true-speciation-completion-rate",
        action="store_true",
        default=False,
        help=
        "True speciation completion rate will be provided to partition probability calculator."
    )
    args = parser.parse_args()
    if args.test_type is None:
        sys.exit("Must specify test type: {}".format(test_types_desc))
    elif args.test_type not in test_types:
        sys.exit("Unrecognized test type: '{}'".format(args.test_type))
    command_kwargs = {}
    if args.check_mode:
        true_sp_rates = (0.01, )
        args.num_replicates = 1
    else:
        # if args.rate_sweep == "1":
        #     true_sp_rates = [x/100.0 for x in range(1, 11)]
        # elif args.rate_sweep == "2":
        #     true_sp_rates = rate_sweep_2
        # else:
        #     raise ValueError("Must specify rate sweep")
        if args.test_type == speciation_completion_rate_test_type:
            # true_sp_rates = [0.001, 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.12]
            true_sp_rates = [
                0.001, 0.002, 0.004, 0.008, 0.01, 0.02, 0.04, 0.08, 0.100
            ]
        else:
            true_sp_rates = rate_sweep_2
    if args.cluster is None:
        sys.exit("Need to specify cluster: '--cluster'")
    elif args.cluster == "flux":
        preamble = flux_preamble.format(mem=args.mem)
    elif args.cluster == "kuhpc":
        preamble = kuhpc_preamble
    elif args.cluster == "mesxuuyan":
        preamble = mesxuuyan_preamble
    else:
        raise ValueError(args.cluster)
    selected_condition = None
    for kw in ("max_time", "num_extant_lineages", "num_extant_orthospecies"):
        if getattr(args, kw) is not None:
            if selected_condition:
                sys.exit(
                    "Need to specify only one of: '--max-time', '--num-extant-lineages', '--num-extant-orthospecies'"
                )
            selected_condition = kw
            command_kwargs[kw] = getattr(args, kw)
    if selected_condition is None:
        sys.exit(
            "Need to specify at least one of: '--max-time', '--num-extant-lineages', '--num-extant-orthospecies'"
        )
    if args.random_seed is None:
        args.random_seed = random.randrange(sys.maxsize)
    rng = random.Random(args.random_seed)
    batch_id = "::".join([
        socket.gethostname(),
        START_DATETIME.strftime("%Y%m%d%H%M%S"),
        str(args.random_seed),
    ])
    log_message = [
        batch_id,
        START_DATETIME.strftime("%Y-%m-%d"),
        START_DATETIME.strftime("%H:%M:%S"),
        "'" + os.path.abspath(os.getcwd()) + "'",
        "'" + " ".join(sys.argv) + "'",
        str(args.random_seed),
    ]
    with open(LOGPATH, "a") as dest:
        dest.write("\t".join(log_message) + "\n")
    for true_sp_rate in true_sp_rates:
        true_speciation_completion_rate = true_sp_rate
        splitting_rate = 0.1
        extinction_rate = 0.0
        data = collections.OrderedDict()
        data["params"] = collections.OrderedDict()
        data["params"][
            "good_species_speciation_initiation_rate"] = splitting_rate
        data["params"][
            "true_speciation_completion_rate"] = true_speciation_completion_rate
        data["params"][
            "incipient_species_speciation_initiation_rate"] = splitting_rate
        data["params"]["good_species_extinction_rate"] = extinction_rate
        data["params"]["incipient_species_extinction_rate"] = extinction_rate
        data["condition"] = selected_condition
        data["condition_value"] = command_kwargs[selected_condition]
        data["trees"] = []
        psm = protractedspeciation.ProtractedSpeciationProcess(
            speciation_initiation_from_orthospecies_rate=splitting_rate,
            speciation_initiation_from_incipient_species_rate=splitting_rate,
            speciation_completion_rate=true_speciation_completion_rate,
            orthospecies_extinction_rate=extinction_rate,
            incipient_species_extinction_rate=extinction_rate,
            rng=rng)
        output_prefix = "{}_spr{:0.3f}_".format(args.title, true_sp_rate)
        tree_idx = 0
        for tree_idx in range(args.num_replicates):
            while True:
                # make sure that the tree we generate has enough species
                lineage_tree, orthospecies_tree = psm.generate_sample(
                    **command_kwargs)
                if len(orthospecies_tree.taxon_namespace
                       ) >= args.min_extant_orthospecies:
                    if args.test_type in partition_test_types:
                        ok = []
                        if args.min_unconstrained_leaves:
                            if len(lineage_tree.taxon_namespace
                                   ) >= args.min_unconstrained_leaves:
                                ok.append(True)
                            else:
                                ok.append(False)
                        if args.min_extant_lineages:
                            if len(lineage_tree.taxon_namespace
                                   ) >= args.min_extant_lineages:
                                ok.append(True)
                            else:
                                ok.append(False)
                        if all(ok):
                            break
                    else:
                        break
            sorted_species_lineages_map = collections.OrderedDict()
            lineage_label_species_label_map = {}
            for k in sorted(
                [t.label for t in orthospecies_tree.taxon_namespace]):
                sorted_species_lineages_map[k] = []
            for ond in orthospecies_tree.leaf_node_iter():
                sorted_species_lineages_map[ond.taxon.label] = sorted(
                    [lnd.taxon.label for lnd in ond.lineage_tree_nodes])
                for lnd in ond.lineage_tree_nodes:
                    lineage_label_species_label_map[
                        lnd.taxon.label] = ond.taxon.label
            true_species_leafsets = sorted(
                sorted_species_lineages_map.values())
            entry = collections.OrderedDict()
            entry["tree_filepath"] = "{}.{:04d}.nex".format(
                output_prefix, tree_idx + 1)
            entry["run_config_filepath"] = "{}.{:04d}.json".format(
                output_prefix, tree_idx + 1)
            entry["lineage_taxon_namespace"] = [
                t.label for t in lineage_tree.taxon_namespace
            ]
            entry["lineage_tree"] = lineage_tree.as_string("newick").replace(
                "\n", "")
            entry["species_taxon_namespace"] = sorted(
                sorted_species_lineages_map.keys())
            entry["species_tree"] = orthospecies_tree.as_string(
                "newick").replace("\n", "")
            entry["species_lineages_map"] = sorted_species_lineages_map
            data["trees"].append(entry)
            lineage_tree.write(path=entry["tree_filepath"], schema="nexus")

            config = collections.OrderedDict()
            # for speciation rate estimation; ignored by species partition estimation
            # for species partition estimation
            species_leafset_constraints = None
            if args.test_type == speciation_completion_rate_test_type:
                config["species_leafset_constraints"] = true_species_leafsets
                true_constrained_lineage_leaf_labels = true_species_leafsets
                true_unconstrained_lineage_leaf_labels = []
                species_leafset_constraint_label_map = {}
            elif args.test_type in partition_test_types:
                if args.constrain_partitions is not None:
                    if args.constrain_partitions == "topological":
                        lineage_tree_internal_nodes = [
                            lnd for lnd in
                            lineage_tree.postorder_internal_node_iter()
                            if lnd is not lineage_tree.seed_node
                        ]
                        rng.shuffle(lineage_tree_internal_nodes)
                        true_unconstrained_lineage_leaf_labels = None
                        for lineage_tree_internal_node in lineage_tree_internal_nodes:
                            true_unconstrained_lineage_leaf_labels = set([
                                lineage_tree_leaf_node.taxon.label
                                for lineage_tree_leaf_node in
                                lineage_tree_internal_node.leaf_iter()
                            ])
                            if not args.num_unconstrained_leaves and not args.min_unconstrained_leaves and not args.max_unconstrained_leaves:
                                break
                            elif args.num_unconstrained_leaves:
                                if len(true_unconstrained_lineage_leaf_labels
                                       ) == args.num_unconstrained_leaves:
                                    break
                            elif args.min_unconstrained_leaves and args.max_unconstrained_leaves:
                                if len(
                                        true_unconstrained_lineage_leaf_labels
                                ) >= args.min_unconstrained_leaves and len(
                                        true_unconstrained_lineage_leaf_labels
                                ) <= args.max_unconstrained_leaves:
                                    break
                            elif args.min_unconstrained_leaves and len(
                                    true_unconstrained_lineage_leaf_labels
                            ) >= args.min_unconstrained_leaves:
                                break
                            elif args.max_unconstrained_leaves and len(
                                    true_unconstrained_lineage_leaf_labels
                            ) <= args.max_unconstrained_leaves:
                                break
                            true_unconstrained_lineage_leaf_labels = None
                        else:
                            raise ValueError(
                                "Unable to meet min/max unconstrained leaves criteria."
                            )
                    elif args.constrain_partitions == "random":
                        lineage_leaf_labels = [
                            taxon.label
                            for taxon in lineage_tree.taxon_namespace
                        ]
                        if args.num_unconstrained_leaves:
                            num_to_sample = args.num_unconstrained_leaves
                        else:
                            if args.min_unconstrained_leaves:
                                min_count = args.min_unconstrained_leaves
                            else:
                                min_count = 1
                            if args.max_unconstrained_leaves:
                                max_count = args.max_unconstrained_leaves
                            else:
                                max_count = len(lineage_leaf_labels)
                            num_to_sample = rng.randint(min_count, max_count)
                        true_unconstrained_lineage_leaf_labels = rng.sample(
                            lineage_leaf_labels, num_to_sample)
                    true_constrained_lineage_leaf_labels = sorted([
                        lineage_tree_leaf_node.taxon.label
                        for lineage_tree_leaf_node in
                        lineage_tree.leaf_node_iter()
                        if lineage_tree_leaf_node.taxon.label not in
                        true_unconstrained_lineage_leaf_labels
                    ])
                    species_leafset_constraint_label_map = collections.OrderedDict(
                    )
                    for lineage_leaf_label in true_constrained_lineage_leaf_labels:
                        true_species_label = lineage_label_species_label_map[
                            lineage_leaf_label]
                        try:
                            species_leafset_constraint_label_map[
                                true_species_label].append(lineage_leaf_label)
                        except KeyError:
                            species_leafset_constraint_label_map[
                                true_species_label] = [lineage_leaf_label]
                    for species_label in species_leafset_constraint_label_map:
                        species_leafset_constraint_label_map[
                            species_label].sort()
                    species_leafset_constraints = []
                    for sp in sorted_species_lineages_map:
                        if sp in species_leafset_constraint_label_map:
                            species_leafset_constraints.append(
                                sorted(
                                    species_leafset_constraint_label_map[sp]))
                else:
                    assert args.constrain_partitions is None
                    assert args.num_unconstrained_leaves is None
                    assert args.min_unconstrained_leaves is None
                    assert args.max_unconstrained_leaves is None
                    true_constrained_lineage_leaf_labels = []
                    true_unconstrained_lineage_leaf_labels = [
                        lineage_tree_leaf_node.taxon.label
                        for lineage_tree_leaf_node in
                        lineage_tree.leaf_node_iter()
                    ]
                    species_leafset_constraint_label_map = {}
                    species_leafset_constraints = None
                if species_leafset_constraints is not None:
                    # this is actually used by the DELINEATE program
                    assert args.constrain_partitions is not None
                    config[
                        "species_leafset_constraints"] = species_leafset_constraints
                else:
                    assert args.constrain_partitions is None
                    try:
                        del config["species_leafset_constraints"]
                    except KeyError:
                        pass

            # for post analysis assessment (not used by the inference program)
            config["test_info"] = collections.OrderedDict()
            config["test_info"]["species_leafsets"] = true_species_leafsets
            config["test_info"]["constrained_lineages"] = sorted(
                true_constrained_lineage_leaf_labels)
            config["test_info"]["unconstrained_lineages"] = sorted(
                true_unconstrained_lineage_leaf_labels)
            config["test_info"][
                "species_leafset_constraint_label_map"] = species_leafset_constraint_label_map
            config["test_info"][
                "species_partition_estimation_num_constrained_species"] = len(
                    species_leafset_constraint_label_map)
            config["test_info"][
                "species_partition_estimation_num_constrained_lineages"] = len(
                    true_constrained_lineage_leaf_labels)
            config["test_info"][
                "species_partition_estimation_num_unconstrained_lineages"] = len(
                    true_unconstrained_lineage_leaf_labels)
            config["test_info"][
                "species_partition_estimation_num_unconstrained_lineages"] = len(
                    true_unconstrained_lineage_leaf_labels)
            config["test_info"][
                "true_speciation_completion_rate"] = true_speciation_completion_rate  # not actually used?
            config["test_info"][
                "true_species_leafsets"] = true_species_leafsets
            config["test_info"]["true_num_species"] = len(
                true_species_leafsets)
            with open(entry["run_config_filepath"], "w") as dest:
                json.dump(config, dest, indent=2)
            job_prefix = entry["tree_filepath"].replace(".nex", "")
            print(job_prefix)
            job_commands = []
            to_clean = []
            common_settings = {
                "run_config_filepath": entry["run_config_filepath"],
                "tree_filepath": entry["tree_filepath"],
                "num_lineages": len(entry["lineage_taxon_namespace"]),
                "num_species": len(entry["species_taxon_namespace"]),
                "true_speciation_completion_rate":
                true_speciation_completion_rate,
                "batch_id": batch_id,
            }
            to_clean.append(common_settings["run_config_filepath"])
            to_clean.append(common_settings["tree_filepath"])
            if args.underflow_protection:
                underflow_protection = "--underflow-protection"
            else:
                underflow_protection = ""
            if args.test_type == speciation_completion_rate_test_type:
                job_kwargs = dict(common_settings)
                job_kwargs[
                    "results_filepath"] = job_prefix + ".speciation-rate.tsv"
                job_kwargs["underflow_protection"] = underflow_protection
                job_commands.append(
                    speciation_rate_estimation_job_template.format(
                        **job_kwargs))
            elif args.test_type in partition_test_types:
                job_kwargs = dict(common_settings)
                job_kwargs["underflow_protection"] = underflow_protection
                job_kwargs[
                    "delineate_results_filepath"] = job_prefix + ".delimitation-results.json"
                to_clean.append(job_kwargs["delineate_results_filepath"])
                if args.specify_true_speciation_completion_rate:
                    job_kwargs[
                        "speciation_completion_rate"] = "--speciation-completion-rate {}".format(
                            true_speciation_completion_rate)
                else:
                    job_kwargs["speciation_completion_rate"] = ""
                job_kwargs[
                    "post_analysis_performance_assessment_command"] = "python3 {}/evaluate-species-partition-estimation.py".format(
                        SCRIPT_DIR)
                job_commands.append(
                    species_partition_estimation_job_template.format(
                        **job_kwargs))
                if args.test_type == joint_partition_prob_test_type:
                    job_kwargs[
                        "joint_performance_assessment_results_filepath"] = job_prefix + ".joint-partition-est-perf.tsv"
                    job_commands.append(
                        species_partition_estimation_joint_probability_analysis_template
                        .format(**job_kwargs))
                elif args.test_type == marginal_partition_prob_test_type:
                    job_kwargs[
                        "marginal_performance_assessment_results_filepath"] = job_prefix + ".marginal-partition-est-perf.tsv"
                    job_commands.append(
                        species_partition_estimation_marginal_probability_analysis_template
                        .format(**job_kwargs))
            job_filepath = job_prefix + ".job"
            if args.clean or args.very_clean:
                clean_command = ["rm", "-f"]
                clean_command.extend(to_clean)
                if args.very_clean:
                    clean_command.append(job_filepath)
                job_commands.append(" ".join(clean_command))
            with open(job_filepath, "w") as dest:
                dest.write(
                    template.format(jobs="\n".join(job_commands),
                                    preamble=preamble))
        if not args.clean and not args.very_clean:
            with open(output_prefix + ".json", "w") as dest:
                json.dump(data, dest, indent=2)