Esempio n. 1
0
 def test_node_times_stable(self):
     # build initial tree sequence with just a, b, c
     nodes = six.StringIO("""\
     id      is_sample   population      time
     0       0           -1              1.00000000000000
     1       1           -1              0.00000000000000
     2       1           -1              0.00000000000000
     """)
     edges = six.StringIO("""\
     id      left            right           parent  child
     0       0.00000000      1.00000000      0       1
     1       0.00000000      1.00000000      0       2
     """)
     init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)
     first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
     arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
     self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
     self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
     self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
     self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
     self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
     self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
     self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
     self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
     arg.update_times()
     node_times = {u: arg.nodes.time[arg.node_ids[u]] for u in arg.node_ids}
     print(arg)
     arg.simplify(self.sample_input_ids)
     print(arg)
     new_node_times = {
         u: arg.nodes.time[arg.node_ids[u]]
         for u in arg.node_ids
     }
     for u in self.sample_input_ids:
         self.assertEqual(node_times[u], new_node_times[u])
Esempio n. 2
0
    def test_intermediate_simplify(self):
        # build initial tree sequence with just a, b, c
        nodes = six.StringIO("""\
        id      is_sample   population      time
        0       0           -1              1.00000000000000
        1       1           -1              0.00000000000000
        2       1           -1              0.00000000000000
        """)
        edges = six.StringIO("""\
        id      left            right           parent  children
        0       0.00000000      1.00000000      0       1,2
        """)
        init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)

        first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
        arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
        self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
        self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
        self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
        self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
        # simplify
        print(arg)
        arg.simplify(samples=[self.ids[u] for u in ['b', 'c', 'e', 'f', 'g']])
        print(arg)
        self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
        self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
        self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
        self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
        print(arg)
        tss = arg.tree_sequence(self.sample_input_ids)
        self.check_trees(tss, self.true_tss)
Esempio n. 3
0
 def test_add_individual(self):
     records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
     records.add_individual(5, 2.0, population=2)
     self.assertEqual(records.nodes.num_rows, self.init_ts.num_nodes + 1)
     self.assertEqual(records.nodes.num_rows, 4)
     self.assertEqual(records.nodes.time[records.node_ids[5]], 2.0)
     self.assertEqual(records.nodes.population[records.node_ids[5]], 2)
     self.assertRaises(ValueError, records.add_individual, 1, 1.5)
Esempio n. 4
0
 def test_init(self):
     records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
     for input_id in self.init_map:
         node_id = self.init_map[input_id]
         self.assertEqual(records.nodes.time[node_id],
                          self.init_ts.node(node_id).time)
         self.assertEqual(records.node_ids[input_id], node_id)
         self.assertEqual(records.edges.num_rows, self.init_ts.num_edges)
Esempio n. 5
0
    def test_build_ts(self):
        # build initial tree sequence with just a, b, c
        nodes = six.StringIO("""\
        id      is_sample   population      time
        0       0           -1              1.00000000000000
        1       1           -1              0.00000000000000
        2       1           -1              0.00000000000000
        """)
        edges = six.StringIO("""\
        id      left            right           parent  child
        0       0.00000000      1.00000000      0       1
        1       0.00000000      1.00000000      0       2
        """)
        init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False)

        first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]}
        arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0)
        # 1. Begin with an individual `a` (and another anonymous one) at `t=0`.
        # taken care of in init_ts
        # arg.add_individual(self.ids['a'], 0.0)
        # # 2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1`
        # self.f(arg, 'a', 'z', 1.0, 'b', 1.0)
        # self.f(arg, 'a', 'z', 1.0, 'c', 1.0)
        # 3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2`
        self.f(arg, 'b', 'a', 0.9, 'd', 2.0)
        self.f(arg, 'a', 'c', 0.1, 'e', 2.0)
        # 4. `(d,e,0.7)->f` at `t=3`
        self.f(arg, 'd', 'e', 0.7, 'f', 3.0)
        # 5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`.
        self.f(arg, 'f', 'd', 0.8, 'g', 4.0)
        self.f(arg, 'e', 'f', 0.2, 'h', 4.0)
        # 6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`.
        self.f(arg, 'b', 'g', 0.6, 'i', 5.0)
        self.f(arg, 'g', 'h', 0.5, 'j', 5.0)
        self.f(arg, 'c', 'h', 0.4, 'k', 5.0)
        # 7. We sample `i`, `j` and `k`.
        arg.mark_samples(samples=self.sample_input_ids)
        arg.update_times()

        arg_ids = {k: arg.node_ids[self.ids[k]] for k in self.ids}
        self.assertEqual(arg.tables.nodes.num_rows, len(self.ids))
        self.assertEqual(arg.max_time, 5.0)
        for x in self.ids:
            self.assertEqual(arg.tables.nodes.time[arg_ids[x]],
                             5.0 - self.true_times[self.ids[x]])
            if x in self.sample_ids:
                self.assertEqual(arg.tables.nodes.flags[arg_ids[x]],
                                 msprime.NODE_IS_SAMPLE)
            else:
                self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], 0)

        tss = arg.tree_sequence(self.sample_input_ids)

        self.check_trees(tss, self.true_tss)
Esempio n. 6
0
 def test_update_times(self):
     records_a = ftprime.ARGrecorder(ts=self.init_ts,
                                     node_ids=self.init_map)
     # check doing update_times along the way doesn't change things
     records_a.update_times()
     records_b = ftprime.ARGrecorder(ts=self.init_ts,
                                     node_ids=self.init_map)
     for r in (records_a, records_b):
         r.add_individual(4, 2.0, population=2)
         r.add_individual(5, 2.0, population=2)
         r.add_record(0.0, 0.5, 0, (4, 5))
         r.add_record(0.5, 1.0, 0, (4, ))
     records_a.update_times()
     records_b.update_times()
     self.assertArrayEqual(records_a.nodes.time, records_b.nodes.time)
     # check update_times is idempotent
     records_b.update_times()
     self.assertArrayEqual(records_a.nodes.time, records_b.nodes.time)
     # and check is right answer
     self.assertArrayEqual(records_a.nodes.time, [3, 2.2, 2, 0, 0])
Esempio n. 7
0
 def test_simplify(self):
     # test that we get the same tree sequence by doing tree_sequence
     # and simplify -> tree_sequence
     records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
     records.add_individual(4, 2.0, population=2)
     records.add_individual(5, 2.0, population=2)
     records.add_record(0.0, 0.5, 0, (4, 5))
     records.add_record(0.5, 1.0, 0, (4, ))
     print(records)
     tsa = records.tree_sequence([4, 5])
     print("---------------- sequence a -----------")
     print(tsa.dump_tables())
     records.simplify([4, 5])
     tsb = records.tree_sequence([4, 5])
     print("---------------- sequence b -----------")
     print(tsb.dump_tables())
     self.check_trees(tsa, tsb)
Esempio n. 8
0
 def test_add_record(self):
     records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map)
     records.add_individual(4, 2.0, population=2)
     records.add_individual(5, 2.0, population=2)
     # adding edges should not change number of nodes
     self.assertEqual(records.nodes.num_rows, self.init_ts.num_nodes + 2)
     records.add_record(0.0, 0.5, 0, (4, 5))
     records.add_record(0.5, 1.0, 0, (4, ))
     self.assertEqual(records.nodes.num_rows, self.init_ts.num_nodes + 2)
     print(records)
     self.assertEqual(records.edges.num_rows,
                      5)  # initial 2 + 3 added above
     self.assertEqual(records.edges.parent[2], records.node_ids[0])
     self.assertEqual(records.edges.child[2], records.node_ids[4])
     self.assertEqual(records.edges.child[3], records.node_ids[5])
     self.assertEqual(records.edges.child[4], records.node_ids[4])
     # try adding record with parent who doesn't exist
     self.assertRaises(ValueError, records.add_record, 0.0, 0.5, 8, (0, 1))
        if any(i < min_child_id or i >= max_child_id
               for i in edges_gen['child']) is True:
            raise RuntimeError("Bad child")
    assert (float(gen) == nodes['generation'].max())


if __name__ == "__main__":
    popsize = int(sys.argv[1])
    theta = float(sys.argv[2])
    nsam = int(sys.argv[3])  # sample size to take and add mutations to
    seed = int(sys.argv[4])

    np.random.seed(seed)

    tracker = MockAncestryTracker()
    args = ftprime.ARGrecorder(node_ids=enumerate(range(2 * popsize)),
                               ts=msprime.simulate(2 * popsize))
    samples = wf(popsize, tracker, 10 * popsize, args)

    args.simplify(samples=range(10 * popsize * 2 *
                                popsize, (10 * popsize + 1) * 2 * popsize))
    ts = args.tree_sequence()
    # for x in ts.dump_tables():
    #     print(x)
    MRCAS = [t.get_time(t.get_root()) for t in ts.trees()]
    print("ARGrecorder MRCAS:", MRCAS)

    # Check that our sample IDs are as expected:
    if __debug__:
        min_sample = 10 * popsize * 2 * popsize
        max_sample = 10 * popsize * 2 * popsize + 2 * popsize
        if any(i < min_sample or i >= max_sample for i in samples) is True:
def ind_to_time(k):
    return 1 + generations - math.floor((k - 1) / N)


def i2c(k, p):
    # individual ID to chromsome
    # "1+" is for the universal common ancestor added below
    out = 1 + 2 * nsamples + ftprime.ind_to_chrom(k, ftprime.mapa_labels[p])
    return out


# Input is of this form:
# offspringID parentID startingPloidy rec1 rec2 ....
# ... coming in *pairs*

args = ftprime.ARGrecorder()
# Add the ancestor of everyone, labeled nsamples
universal_ancestor = 2 * nsamples
args.add_individual(name=universal_ancestor,
                    time=float(1 + generations + ancestor_age))
# add initial generation
first_gen = [i2c(k, p) for k in range(1, N + 1) for p in [0, 1]]
first_gen.sort()
args.add_record(0.0, length, universal_ancestor, tuple(first_gen))
for k in range(1, N + 1):
    for p in [0, 1]:
        args.add_individual(i2c(k, p), ind_to_time(k))

nlines = 0
log_lines = 10000