Beispiel #1
0
 def test_reload_annotate(self):
     """
     Test the ability of SLiM to load our files after annotation.
     """
     for ts, basename in self.get_slim_restarts():
         tables = ts.tables
         metadata = list(pyslim.extract_mutation_metadata(tables))
         has_nucleotides = (metadata[0][0].nucleotide >= 0)
         if has_nucleotides:
             nucs = [random.choice([0, 1, 2, 3]) for _ in metadata]
             refseq = "".join(
                 random.choices(pyslim.NUCLEOTIDES,
                                k=int(ts.sequence_length)))
             for n, md in zip(nucs, metadata):
                 for j in range(len(md)):
                     md[j].nucleotide = n
         else:
             refseq = None
         for md in metadata:
             for j in range(len(md)):
                 md[j].selection_coeff = random.random()
         pyslim.annotate_mutation_metadata(tables, metadata)
         in_ts = pyslim.load_tables(tables, reference_sequence=refseq)
         # put it through SLiM (which just reads in and writes out)
         out_ts = self.run_slim_restart(in_ts, basename)
         # check for equality, in everything but the last provenance
         self.verify_slim_restart_equality(in_ts, out_ts)
Beispiel #2
0
    def _update_tables(self):
        """Remove extra psuedopopulation nodes."""
        # get mutable tskit.TableCollection
        tables = self.tree_sequence.dump_tables()
        nnodes = tables.nodes.time.size

        # there is a null SLiM population (0) that doesnt really exist
        # and the actual poulation (1). So we set all to 0.
        tables.nodes.population = np.zeros(nnodes, dtype=np.int32)
        #meta = tables.metadata
        #meta["SLiM"]["generation"] = int(meta["SLiM"]["generation"] / 2.)
        #tables.metadata = meta
        #tables.nodes.time /= 2
        #tables.mutations.time /= 2.

        # drop nodes that are not connected to anything. This includes
        # the pseudo-nodes representing half of the haploid populations.
        nodes_in_edge_table = list(
            set(tables.edges.parent).union(tables.edges.child))

        # remove the empty population nodes by using simplify, which
        tables.simplify(
            samples=nodes_in_edge_table,
            keep_input_roots=True,
            filter_individuals=True,
            filter_populations=True,
            filter_sites=False,
        )

        # turn it back into a treesequence
        self.tree_sequence = pyslim.load_tables(tables)
Beispiel #3
0
 def test_annotate_individuals(self):
     for ts in self.get_msprime_examples():
         slim_ts = pyslim.annotate_defaults(ts,
                                            model_type="nonWF",
                                            slim_generation=1)
         tables = slim_ts.tables
         metadata = list(pyslim.extract_individual_metadata(tables))
         self.assertEqual(len(metadata), slim_ts.num_individuals)
         sexes = [
             random.choice([
                 pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE
             ]) for _ in metadata
         ]
         for j in range(len(metadata)):
             metadata[j].sex = sexes[j]
         pyslim.annotate_individual_metadata(tables, metadata)
         new_ts = pyslim.load_tables(tables)
         for j, ind in enumerate(new_ts.individuals()):
             md = ind.metadata
             self.assertEqual(md.sex, sexes[j])
         # try loading this into SLiM
         loaded_ts = self.run_msprime_restart(new_ts, sex="A")
         self.verify_annotated_tables(new_ts, slim_ts)
         self.verify_annotated_trees(new_ts, slim_ts)
         self.verify_haplotype_equality(new_ts, slim_ts)
Beispiel #4
0
 def test_load(self, recipe):
     fn = recipe["path"]["ts"]
     # load in msprime then switch
     msp_ts = tskit.load(fn)
     assert isinstance(msp_ts, tskit.TreeSequence)
     # transfer tables
     msp_tables = msp_ts.dump_tables()
     new_ts = pyslim.load_tables(msp_tables)
     assert isinstance(new_ts, pyslim.SlimTreeSequence)
     self.verify_times(msp_ts, new_ts)
     new_tables = new_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, new_tables)
     # convert directly
     new_ts = pyslim.SlimTreeSequence(msp_ts)
     assert isinstance(new_ts, pyslim.SlimTreeSequence)
     self.verify_times(msp_ts, new_ts)
     new_tables = new_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, new_tables)
     # load to pyslim from file
     slim_ts = pyslim.load(fn)
     assert isinstance(slim_ts, pyslim.SlimTreeSequence)
     slim_tables = slim_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, slim_tables)
     assert slim_ts.metadata['SLiM']['generation'] == new_ts.metadata[
         'SLiM']['generation']
 def test_load(self):
     for _, ex in self.get_slim_examples(return_info=True):
         fn = ex['basename'] + ".trees"
         # load in msprime then switch
         msp_ts = tskit.load(fn)
         self.assertTrue(type(msp_ts) is msprime.TreeSequence)
         # transfer tables
         msp_tables = msp_ts.tables
         new_ts = pyslim.load_tables(msp_tables, legacy_metadata=True)
         self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence))
         self.verify_times(msp_ts, new_ts)
         new_tables = new_ts.tables
         self.assertTableCollectionsEqual(msp_tables, new_tables)
         # convert directly
         new_ts = pyslim.SlimTreeSequence(msp_ts)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         self.verify_times(msp_ts, new_ts)
         new_tables = new_ts.tables
         self.assertTableCollectionsEqual(msp_tables, new_tables)
         # load to pyslim from file
         slim_ts = pyslim.load(fn, legacy_metadata=True)
         self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence)
         slim_tables = slim_ts.tables
         self.assertTableCollectionsEqual(msp_tables, slim_tables)
         self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
Beispiel #6
0
 def test_reload_annotate(self):
     # Test the ability of SLiM to load our files after annotation.
     for ts, basename in self.get_slim_restarts(no_op=True):
         tables = ts.tables
         metadata = [m.metadata for m in tables.mutations]
         has_nucleotides = tables.metadata['SLiM']['nucleotide_based']
         if has_nucleotides:
             nucs = [random.choice([0, 1, 2, 3]) for _ in metadata]
             refseq = "".join(
                 random.choices(pyslim.NUCLEOTIDES,
                                k=int(ts.sequence_length)))
             for n, md in zip(nucs, metadata):
                 for m in md['mutation_list']:
                     m["nucleotide"] = n
         else:
             refseq = None
         for md in metadata:
             for m in md['mutation_list']:
                 m["selection_coeff"] = random.random()
         ms = tables.mutations.metadata_schema
         tables.mutations.packset_metadata(
             [ms.validate_and_encode_row(r) for r in metadata])
         in_ts = pyslim.load_tables(tables, reference_sequence=refseq)
         # put it through SLiM (which just reads in and writes out)
         out_ts = self.run_slim_restart(in_ts, basename)
         # check for equality, in everything but the last provenance
         self.verify_slim_restart_equality(in_ts, out_ts)
Beispiel #7
0
 def test_annotate_individuals(self):
     for ts in self.get_msprime_examples():
         slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
         tables = slim_ts.tables
         top_md = tables.metadata
         top_md['SLiM']['separate_sexes'] = True
         tables.metadata = top_md
         metadata = [ind.metadata for ind in tables.individuals]
         sexes = [random.choice([pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE])
                  for _ in metadata]
         for j in range(len(metadata)):
             metadata[j]["sex"] = sexes[j]
         ims = tables.individuals.metadata_schema
         tables.individuals.packset_metadata(
                 [ims.validate_and_encode_row(r) for r in metadata])
         pop_metadata = [p.metadata for p in tables.populations]
         for j, md in enumerate(pop_metadata):
             # nonWF models always have this
             md['sex_ratio'] = 0.0
         pms = tables.populations.metadata_schema
         tables.populations.packset_metadata(
                 [pms.validate_and_encode_row(r) for r in pop_metadata])
         new_ts = pyslim.load_tables(tables)
         for j, ind in enumerate(new_ts.individuals()):
             md = ind.metadata
             self.assertEqual(md["sex"], sexes[j])
         self.verify_annotated_tables(new_ts, slim_ts)
         self.verify_annotated_trees(new_ts, slim_ts)
         self.verify_haplotype_equality(new_ts, slim_ts)
         # try loading this into SLiM
         loaded_ts = self.run_msprime_restart(new_ts, sex="A")
         self.verify_trees_equal(new_ts, loaded_ts)
Beispiel #8
0
 def test_recover_metadata(self):
     # msprime <=0.7.5 discards metadata, but we can recover it from provenance
     for ts in self.get_slim_examples():
         t = ts.tables
         t.metadata_schema = tskit.MetadataSchema(None)
         t.metadata = b''
         new_ts = pyslim.load_tables(t)
         self.assertEqual(new_ts.metadata, ts.metadata)
Beispiel #9
0
 def test_load_tables(self):
     for ts in self.get_slim_examples():
         self.assertTrue(isinstance(ts, pyslim.SlimTreeSequence))
         tables = ts.tables
         new_ts = pyslim.load_tables(tables)
         self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence))
         new_tables = new_ts.tables
         self.assertEqual(tables, new_tables)
Beispiel #10
0
 def test_load_tables(self, recipe):
     ts = recipe["ts"]
     assert isinstance(ts, pyslim.SlimTreeSequence)
     tables = ts.dump_tables()
     new_ts = pyslim.load_tables(tables)
     assert isinstance(new_ts, pyslim.SlimTreeSequence)
     new_tables = new_ts.dump_tables()
     assert tables == new_tables
Beispiel #11
0
 def test_recover_metadata(self, recipe):
     # msprime <=0.7.5 discards metadata, but we can recover it from provenance
     ts = recipe["ts"]
     tables = ts.dump_tables()
     tables.metadata_schema = tskit.MetadataSchema(None)
     tables.metadata = b''
     new_ts = pyslim.load_tables(tables)
     assert new_ts.metadata == ts.metadata
 def test_load_tables(self):
     for ts in self.get_slim_examples():
         self.assertTrue(type(ts) is pyslim.SlimTreeSequence)
         tables = ts.tables
         new_ts = pyslim.load_tables(tables, legacy_metadata=True)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         new_tables = new_ts.tables
         self.assertEqual(tables, new_tables)
Beispiel #13
0
 def test_annotate_XY(self):
     random.seed(8)
     for ts in self.get_msprime_examples():
         for genome_type in ["X", "Y"]:
             slim_ts = pyslim.annotate_defaults(ts,
                                                model_type="nonWF",
                                                slim_generation=1)
             tables = slim_ts.tables
             top_md = tables.metadata
             top_md['SLiM']['separate_sexes'] = True
             tables.metadata = top_md
             metadata = [ind.metadata for ind in tables.individuals]
             sexes = [
                 random.choice([
                     pyslim.INDIVIDUAL_TYPE_FEMALE,
                     pyslim.INDIVIDUAL_TYPE_MALE
                 ]) for _ in metadata
             ]
             for j in range(len(metadata)):
                 metadata[j]["sex"] = sexes[j]
             ims = tables.individuals.metadata_schema
             tables.individuals.packset_metadata(
                 [ims.validate_and_encode_row(r) for r in metadata])
             node_metadata = [n.metadata for n in tables.nodes]
             for j in range(slim_ts.num_individuals):
                 nodes = slim_ts.individual(j).nodes
                 node_metadata[
                     nodes[0]]["genome_type"] = pyslim.GENOME_TYPE_X
                 node_metadata[nodes[0]]["is_null"] = (genome_type != "X")
                 if sexes[j] == pyslim.INDIVIDUAL_TYPE_MALE:
                     node_metadata[
                         nodes[1]]["genome_type"] = pyslim.GENOME_TYPE_Y
                     node_metadata[nodes[1]]["is_null"] = (genome_type !=
                                                           "Y")
                 else:
                     node_metadata[
                         nodes[1]]["genome_type"] = pyslim.GENOME_TYPE_X
                     node_metadata[nodes[1]]["is_null"] = (genome_type !=
                                                           "X")
             nms = tables.nodes.metadata_schema
             tables.nodes.packset_metadata(
                 [nms.validate_and_encode_row(r) for r in node_metadata])
             pop_metadata = [p.metadata for p in tables.populations]
             for j, md in enumerate(pop_metadata):
                 # nonWF models always have this
                 md['sex_ratio'] = 0.0
             pms = tables.populations.metadata_schema
             tables.populations.packset_metadata(
                 [pms.validate_and_encode_row(r) for r in pop_metadata])
             new_ts = pyslim.load_tables(tables)
             self.verify_annotated_tables(new_ts, slim_ts)
             self.verify_annotated_trees(new_ts, slim_ts)
             self.verify_haplotype_equality(new_ts, slim_ts)
             # try loading this into SLiM
             loaded_ts = self.run_msprime_restart(new_ts, sex=genome_type)
             self.verify_trees_equal(new_ts, loaded_ts)
Beispiel #14
0
    def __init__(
            self,
            files: list = None,  #takes exactly two files (for now)
            simlength: int = None,  #length in generations
            popsize: int = None,  #initial size of each population
            recomb: float = None,  #recomcbination rate
            mutrate: float = None,  #mutations rate
            chromosome=None,  #'shadie.chromosome.ChromosomeBase'
            altgen: bool = True,  #is model altgen or not?
    ):
        """
        Reads in two SLiM .trees files, merges them, recapitates, 
        overlays neutral mutations and saves info.
        """

        if altgen is True:
            self.mutrate = mutrate / 2
        else:
            self.mutrate = mutrate

        self.chromosome = chromosome
        self.simlength = simlength
        self.recomb = recomb
        self.popsize = popsize
        self.pops = None

        self.species = []
        ids = []
        species = []

        #read in all thre tree sequences
        for i in range(0, len(files)):
            ts = pyslim.load(files[i])
            species.append(ts)

        #merge the p0 and p1 populations in both edges
        tablelist = []
        mod_tslist = []
        for ts in species:
            tables = ts.tables
            tables.nodes.population = np.zeros(tables.nodes.num_rows,
                                               dtype=np.int32)
            modts = pyslim.load_tables(tables)
            mod_tslist.append(modts)

        #remove extra population
        onepop_tslist = []
        for ts in mod_tslist:
            onepop = ts.simplify(keep_input_roots=True,
                                 keep_unary_in_individuals=True)
            onepop_tslist.append(onepop)

        self.onepop_tslist = onepop_tslist
Beispiel #15
0
 def test_annotate_mutations(self):
     for ts in get_msprime_examples():
         slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
         tables = slim_ts.tables
         metadata = list(pyslim.extract_mutation_metadata(tables))
         self.assertEqual(len(metadata), slim_ts.num_mutations)
         selcoefs = [random.uniform(0, 1) for _ in metadata]
         for j in range(len(metadata)):
             metadata[j].selection_coeff = selcoefs[j]
         pyslim.annotate_mutation_metadata(tables, metadata)
         new_ts = pyslim.load_tables(tables)
         for j, x in enumerate(new_ts.mutations()):
             md = pyslim.decode_mutation(x.metadata)
             self.assertEqual(md.selection_coeff, selcoefs[j])
Beispiel #16
0
 def test_annotate_individuals(self):
     for ts in get_msprime_examples():
         slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
         tables = slim_ts.tables
         metadata = list(pyslim.extract_individual_metadata(tables))
         self.assertEqual(len(metadata), slim_ts.num_individuals)
         sexes = [random.choice([pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE])
                  for _ in metadata]
         for j in range(len(metadata)):
             metadata[j].sex = sexes[j]
         pyslim.annotate_individual_metadata(tables, metadata)
         new_ts = pyslim.load_tables(tables)
         for j, ind in enumerate(new_ts.individuals()):
             md = pyslim.decode_individual(ind.metadata)
             self.assertEqual(md.sex, sexes[j])
Beispiel #17
0
 def test_annotate_mutations(self):
     for ts in self.get_msprime_examples():
         slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
         tables = slim_ts.tables
         metadata = [m.metadata for m in tables.mutations]
         selcoefs = [random.uniform(0, 1) for _ in metadata]
         for j in range(len(metadata)):
             metadata[j]['mutation_list'][0]["selection_coeff"] = selcoefs[j]
         ms = tables.mutations.metadata_schema
         tables.mutations.packset_metadata(
                 [ms.validate_and_encode_row(r) for r in metadata])
         new_ts = pyslim.load_tables(tables)
         for j, x in enumerate(new_ts.mutations()):
             md = x.metadata
             self.assertEqual(md['mutation_list'][0]["selection_coeff"], selcoefs[j])
Beispiel #18
0
 def test_annotate_nodes(self):
     for ts in get_msprime_examples():
         slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
         tables = slim_ts.tables
         metadata = list(pyslim.extract_node_metadata(tables))
         self.assertEqual(len(metadata), slim_ts.num_nodes)
         gtypes = [random.choice([pyslim.GENOME_TYPE_X, pyslim.GENOME_TYPE_Y])
                   for _ in metadata]
         for j in range(len(metadata)):
             if metadata[j] is not None:
                 metadata[j].genome_type = gtypes[j]
         pyslim.annotate_node_metadata(tables, metadata)
         new_ts = pyslim.load_tables(tables)
         for j, x in enumerate(new_ts.nodes()):
             md = pyslim.decode_node(x.metadata)
             if md is not None:
                 self.assertEqual(md.genome_type, gtypes[j])
Beispiel #19
0
 def test_annotate_nodes(self):
     for ts in self.get_msprime_examples():
         slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
         tables = slim_ts.tables
         metadata = [n.metadata for n in tables.nodes]
         gtypes = [random.choice([pyslim.GENOME_TYPE_X, pyslim.GENOME_TYPE_Y])
                   for _ in metadata]
         for md, g in zip(metadata, gtypes):
             if md is not None:
                 md["genome_type"] = g
         nms = tables.nodes.metadata_schema
         tables.nodes.packset_metadata(
                 [nms.validate_and_encode_row(r) for r in metadata])
         new_ts = pyslim.load_tables(tables)
         for x, g in zip(new_ts.nodes(), gtypes):
             if x.metadata is not None:
                 self.assertEqual(x.metadata["genome_type"], g)
Beispiel #20
0
    def _remove_null_population_and_nodes(self):
        """Call tskit simplify function to remove null pop.

        There is a null population in shadie simulations because we
        define an alternation of generations with two alternating
        subpopulations. At the final generation of shadie SLiMulation
        the generation is even, and so we ...
        """
        # set population=0 for all nodes in each ts. Nodes from the
        # diploid sub-generation are currently labeled as population=1.
        for idx, treeseq in enumerate(self._tree_sequences):

            # tskit tables are immutable, but we can modify a copy of
            # the table and use load_tables to make a new ts from it.
            tables = treeseq.tables

            # modify the tables to set population to 0 for all
            nnodes = tables.nodes.num_rows
            tables.nodes.population = np.zeros(nnodes, dtype=np.int32)

            # modify table metadata for SLiM sim length
            # tables.metadata["SLiM"]["generation"] = int(
            #     tables.metadata["SLiM"]["generation"] / 2)
            # tables.mutations.time = tables.mutations.time / 2.
            # tables.nodes.time = tables.nodes.time / 2.

            # drop nodes that are not connected to anything. This includes
            # the pseudo-nodes representing half of the haploid populations.
            nodes_in_edge_table = list(
                set(tables.edges.parent).union(tables.edges.child))

            # reload treeseq FROM modified tables
            mod_tree_seq = pyslim.load_tables(tables)

            # remove the empty population (p1) by using simplify, which will
            # find that there are no longer any nodes in population=1. This
            # does not remove any Nodes, but it does remove a population.
            # https://tskit.dev/tskit/docs/stable/_modules/tskit/tables.html
            self._tree_sequences[idx] = mod_tree_seq.simplify(
                samples=nodes_in_edge_table,
                keep_input_roots=True,
                keep_unary_in_individuals=True)
Beispiel #21
0
 def test_load(self):
     for fn in self.get_slim_example_files():
         # load in msprime then switch
         msp_ts = tskit.load(fn)
         self.assertTrue(type(msp_ts) is msprime.TreeSequence)
         # transfer tables
         msp_tables = msp_ts.tables
         new_ts = pyslim.load_tables(msp_tables)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         self.verify_times(msp_ts, new_ts)
         self.assertEqual(msp_tables, new_ts.tables)
         # convert directly
         new_ts = pyslim.SlimTreeSequence(msp_ts)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         self.verify_times(msp_ts, new_ts)
         self.assertEqual(msp_tables, new_ts.tables)
         # load to pyslim from file
         slim_ts = pyslim.load(fn)
         self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence)
         self.assertEqual(msp_tables, slim_ts.tables)
         self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
Beispiel #22
0
    def _update_tables(self):
        """DEPRECATED.

        Alternative approach to remove nulll pop and divide time.
        This didn't work, still squishes edges...

        Try this stuff next...
        https://github.com/tskit-dev/pyslim/blob/625295ba6b4ae8e8400953be65b03b3630c1430f/docs/vignette_continuing.md#continuing-the-simulation
        """
        for idx, treeseq in enumerate(self._tree_sequences):

            # get mutable tskit.TableCollection
            tables = treeseq.dump_tables()
            nnodes = tables.nodes.time.size

            # there is a null SLiM population (0) that doesnt really exist
            # and the actual poulation (1). So we set all to 0.
            tables.nodes.population = np.zeros(nnodes, dtype=np.int32)
            meta = tables.metadata
            meta["SLiM"]["generation"] = int(meta["SLiM"]["generation"] / 2.)
            tables.metadata = meta
            tables.nodes.time /= 2
            tables.mutations.time /= 2.

            # turn it back into a treesequence
            treeseq = pyslim.load_tables(tables)  #.tree_sequence()

            # drop nodes that are not connected to anything. This includes
            # the pseudo-nodes representing half of the haploid populations.
            nodes_in_edge_table = list(
                set(tables.edges.parent).union(tables.edges.child))

            # remove the empty population nodes by using simplify, which
            # will remove unconnected nodes (those not in samples). This
            # does not remove any Nodes, but it does remove a population.
            # https://tskit.dev/tskit/docs/stable/_modules/tskit/tables.html
            self._tree_sequences[idx] = treeseq.simplify(
                samples=nodes_in_edge_table,
                keep_input_roots=True,
                keep_unary_in_individuals=True)
Beispiel #23
0
 def test_annotate_XY(self):
     for ts in self.get_msprime_examples():
         for genome_type in ["X", "Y"]:
             slim_ts = pyslim.annotate_defaults(ts,
                                                model_type="nonWF",
                                                slim_generation=1)
             tables = slim_ts.tables
             metadata = list(pyslim.extract_individual_metadata(tables))
             self.assertEqual(len(metadata), slim_ts.num_individuals)
             sexes = [
                 random.choice([
                     pyslim.INDIVIDUAL_TYPE_FEMALE,
                     pyslim.INDIVIDUAL_TYPE_MALE
                 ]) for _ in metadata
             ]
             for j in range(len(metadata)):
                 metadata[j].sex = sexes[j]
             pyslim.annotate_individual_metadata(tables, metadata)
             node_metadata = list(pyslim.extract_node_metadata(tables))
             self.assertEqual(len(node_metadata), slim_ts.num_nodes)
             for j in range(slim_ts.num_individuals):
                 nodes = slim_ts.individual(j).nodes
                 node_metadata[nodes[0]].genome_type = pyslim.GENOME_TYPE_X
                 node_metadata[nodes[0]].is_null = (genome_type != "X")
                 if sexes[j] == pyslim.INDIVIDUAL_TYPE_MALE:
                     node_metadata[
                         nodes[1]].genome_type = pyslim.GENOME_TYPE_Y
                     node_metadata[nodes[1]].is_null = (genome_type != "Y")
                 else:
                     node_metadata[
                         nodes[1]].genome_type = pyslim.GENOME_TYPE_X
                     node_metadata[nodes[1]].is_null = (genome_type != "X")
             pyslim.annotate_node_metadata(tables, node_metadata)
             new_ts = pyslim.load_tables(tables)
             # try loading this into SLiM
             loaded_ts = self.run_msprime_restart(new_ts, sex=genome_type)
             self.verify_annotated_tables(new_ts, slim_ts)
             self.verify_annotated_trees(new_ts, slim_ts)
             self.verify_haplotype_equality(new_ts, slim_ts)
Beispiel #24
0
def throw_mut_on_tree(ts):
    # this function takes an unmutated tree sequence and "throws" a single mutation onto it, representing
    # the standing variant in a sweep that starts immediately after burnin
    global args

    if not args.af:
        n = args.Ne
    else:
        n = 2 * 14474
    l = args.l
    r = args.r
    q = args.q
    c = args.c

    # find total tree length times sequence extent
    tree_sizes = np.array([
        t.total_branch_length *
        (np.ceil(t.interval[1]) - np.ceil(t.interval[0])) for t in ts.trees()
    ])
    tree_sizes /= sum(tree_sizes)

    # pick the tree
    tree_index = np.random.choice(ts.num_trees, size=1, p=tree_sizes)
    t = ts.first()
    for (i, t) in enumerate(ts.trees()):
        if i == tree_index:
            break

    assert (t.index == tree_index)

    # pick the branch
    cpicked = -1
    while cpicked < c:
        treeloc = t.total_branch_length * np.random.uniform()
        for mut_n in t.nodes():
            if mut_n != t.root:
                treeloc -= t.branch_length(mut_n)
                if treeloc <= 0:
                    cpicked = t.num_samples(mut_n) / (n)
                    #print(cpicked)
                    break

    # pick the location on the sequence
    mut_base = 0.0 + np.random.randint(
        low=np.ceil(t.interval[0]), high=np.ceil(t.interval[1]), size=1)

    # the following assumes that there's no other mutations in the tree sequence
    assert (ts.num_sites == 0)

    # the mutation metadata
    mut_md = pyslim.MutationMetadata(mutation_type=1,
                                     selection_coeff=0.0,
                                     population=1,
                                     slim_time=1)

    tables = ts.tables
    site_id = tables.sites.add_row(position=mut_base, ancestral_state=b'')
    tables.mutations.add_row(site=site_id,
                             node=mut_n,
                             derived_state='1',
                             metadata=pyslim.encode_mutation([mut_md]))

    mut_ts = pyslim.load_tables(tables)

    # genotypes
    #out_slim_targets = open('%s.slim.targets'%(out),'w')
    #for i,g in enumerate(mut_ts.genotype_matrix()[0]):
    #	if g == 1:
    #		#print(i)
    #		out_slim_targets.write('%d\n'%(i))
    #out_slim_targets.close()
    if not args.q:
        print(mut_ts.genotype_matrix())
        print('%d / %d' % (np.sum(mut_ts.genotype_matrix()), n))
    freq = np.sum(mut_ts.genotype_matrix()) / (n)

    return mut_base, freq, mut_ts
Beispiel #25
0
    [ims.validate_and_encode_row(md) for md in individual_metadata])

# add selected mutation

mut_ind_id = random.choice(range(tables.individuals.num_rows))
mut_node_id = random.choice(np.where(tables.nodes.individual == mut_ind_id)[0])
mut_node = tables.nodes[mut_node_id]
mut_metadata = {
    "mutation_list": [{
        "mutation_type":
        2,
        "selection_coeff":
        0.1,
        "subpopulation":
        mut_node.population,
        "slim_time":
        int(tables.metadata['SLiM']['generation'] - mut_node.time),
        "nucleotide":
        -1
    }]
}
site_num = tables.sites.add_row(position=5000, ancestral_state='')
tables.mutations.add_row(node=mut_node_id,
                         site=site_num,
                         derived_state='1',
                         time=mut_node.time,
                         metadata=mut_metadata)

slim_ts = pyslim.load_tables(tables)
slim_ts.dump("recipe_17.9.trees")
# Keywords: Python, nonWF, non-Wright-Fisher, tree-sequence recording, tree sequence recording

import msprime, pyslim, random

ts = msprime.simulate(sample_size=10000, Ne=5000, length=1e8,
    mutation_rate=0.0, recombination_rate=1e-8)

tables = ts.dump_tables()
pyslim.annotate_defaults_tables(tables, model_type="nonWF", slim_generation=1)
individual_metadata = list(pyslim.extract_individual_metadata(tables))
for j in range(len(individual_metadata)):
    individual_metadata[j].sex = random.choice([pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE])
    individual_metadata[j].age = random.choice([0, 1, 2, 3, 4])

pyslim.annotate_individual_metadata(tables, individual_metadata)
slim_ts = pyslim.load_tables(tables)
slim_ts.dump("recipe_17.9.trees")