def run_simulate(args): """ Runs the simulation and outputs the results in text. """ n = args.sample_size m = args.num_loci rho = args.recombination_rate num_populations = args.num_populations migration_matrix = [[ args.migration_rate * int(j != k) for j in range(num_populations) ] for k in range(num_populations)] sample_configuration = [0 for j in range(num_populations)] population_growth_rates = [0 for j in range(num_populations)] population_sizes = [1 for j in range(num_populations)] sample_configuration[0] = n if args.sample_configuration is not None: sample_configuration = args.sample_configuration if args.population_growth_rates is not None: population_growth_rates = args.population_growth_rates if args.population_sizes is not None: population_sizes = args.population_sizes random.seed(args.random_seed) s = Simulator(n, m, rho, migration_matrix, sample_configuration, population_growth_rates, population_sizes, args.population_growth_rate_change, args.population_size_change, args.migration_matrix_element_change, args.bottleneck, 10000) s.simulate() nodes_file = StringIO() edgesets_file = StringIO() s.write_text(nodes_file, edgesets_file) nodes_file.seek(0) edgesets_file.seek(0) ts = msprime.load_text(nodes_file, edgesets_file) process_trees(ts)
def test_node_times_stable(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 1.00000000 0 1 1 0.00000000 1.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) self.f(arg, 'd', 'e', 0.7, 'f', 3.0) self.f(arg, 'f', 'd', 0.8, 'g', 4.0) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) arg.update_times() node_times = {u: arg.nodes.time[arg.node_ids[u]] for u in arg.node_ids} print(arg) arg.simplify(self.sample_input_ids) print(arg) new_node_times = { u: arg.nodes.time[arg.node_ids[u]] for u in arg.node_ids } for u in self.sample_input_ids: self.assertEqual(node_times[u], new_node_times[u])
def test_intermediate_simplify(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent children 0 0.00000000 1.00000000 0 1,2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) self.f(arg, 'd', 'e', 0.7, 'f', 3.0) self.f(arg, 'f', 'd', 0.8, 'g', 4.0) # simplify print(arg) arg.simplify(samples=[self.ids[u] for u in ['b', 'c', 'e', 'f', 'g']]) print(arg) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) print(arg) tss = arg.tree_sequence(self.sample_input_ids) self.check_trees(tss, self.true_tss)
def ts_txts_to_trees(ts_nodes, ts_edges, trees_outname=None): import shutil import msprime logging.info("== Converting new ts ARG to .trees ===") try: ts = msprime.load_text(nodes=ts_nodes, edges=ts_edges) except: logging.warning( "Can't load the texts file properly. Saved copied to 'bad.nodes' & 'bad.edges' for inspection" ) shutil.copyfile(ts_nodes.name, "bad.nodes") shutil.copyfile(ts_edges.name, "bad.edges") raise logging.info("== loaded {}, {}===".format(ts_nodes.name, ts_edges.name)) try: simple_ts = ts.simplify() except: ts.dump("bad.trees") logging.warning( "Can't simplify. .trees file dumped to 'bad.trees'") raise if trees_outname: simple_ts.dump(trees_outname) return (simple_ts)
def simple_ex(self): # this will begin with a single diploid indiv nodes = six.StringIO("""\ id is_sample population time 0 0 0 1.00000000000000 1 1 1 0.00000000000000 2 1 2 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 3.00000000 0 1 1 0.00000000 3.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) # diploid 0 maps initially to haploids 1 and 2 in init_ts: node_ids = {(0, 0): 1, (0, 1): 2} locus_position = [0.0, 1.0, 2.0, 3.0] rc = ftprime.RecombCollector(ts=init_ts, node_ids=node_ids, locus_position=locus_position, benchmark=True) assert rc.mode == 'text' rc2 = ftprime.RecombCollector(ts=init_ts, node_ids=node_ids, locus_position=locus_position, benchmark=True, mode='binary') assert rc2.mode == 'binary' return rc, node_ids
def main(ts, fastARG_executable, fa_in, fa_out, nodes_fh, edges_fh, sites_fh, muts_fh): """ This is just to test if fastarg produces the same haplotypes """ import subprocess seq_len = ts.get_sequence_length() ts_to_fastARG_in(ts, fa_in) subprocess.call([fastARG_executable, 'build', fa_in.name], stdout=fa_out) fastARG_out_to_ts_txts(fa_out, variant_positions_from_fastARGin(fa_in), nodes_fh, edges_fh, sites_fh, muts_fh, seq_len=seq_len) new_ts = msprime.load_text(nodes=nodes_fh, edges=edges_fh, sites=sites_fh, mutations=muts_fh) simple_ts = new_ts.simplify() logging.debug( "Simplified num_records should always be < unsimplified num_records.\n" "For low mutationRate:recombinationRate ratio," " the initial num records will probably be higher than the" " fastarg num_records, as the original simulation will have records" " which leave no mutational trace. As the mutation rate increases," " we expect the fastarg num_records to equal, then exceed the original" " as fastarg starts inferring the wrong (less parsimonious) set of trees" ) logging.debug( "Initial num records = {}, fastARG (simplified) = {}, fastARG (unsimplified) = {}" .format(ts.get_num_records(), simple_ts.get_num_records(), new_ts.get_num_records()))
def test_build_ts(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 1.00000000 0 1 1 0.00000000 1.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) # 1. Begin with an individual `a` (and another anonymous one) at `t=0`. # taken care of in init_ts # arg.add_individual(self.ids['a'], 0.0) # # 2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1` # self.f(arg, 'a', 'z', 1.0, 'b', 1.0) # self.f(arg, 'a', 'z', 1.0, 'c', 1.0) # 3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2` self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) # 4. `(d,e,0.7)->f` at `t=3` self.f(arg, 'd', 'e', 0.7, 'f', 3.0) # 5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`. self.f(arg, 'f', 'd', 0.8, 'g', 4.0) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) # 6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`. self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) # 7. We sample `i`, `j` and `k`. arg.mark_samples(samples=self.sample_input_ids) arg.update_times() arg_ids = {k: arg.node_ids[self.ids[k]] for k in self.ids} self.assertEqual(arg.tables.nodes.num_rows, len(self.ids)) self.assertEqual(arg.max_time, 5.0) for x in self.ids: self.assertEqual(arg.tables.nodes.time[arg_ids[x]], 5.0 - self.true_times[self.ids[x]]) if x in self.sample_ids: self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], msprime.NODE_IS_SAMPLE) else: self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], 0) tss = arg.tree_sequence(self.sample_input_ids) self.check_trees(tss, self.true_tss)
def test_stick_tree(self): nodes = six.StringIO("""\ id is_sample time 0 1 0 1 1 1 2 1 2 """) edges = six.StringIO("""\ left right parent child 0 1 1 0 0 1 2 1 """) tree = ("2\n" "┃\n" "1\n" "┃\n" "0\n") ts = msprime.load_text(nodes, edges, strict=False) t = next(ts.trees()) self.verify_text_rendering(t, tree)
def test_trident_tree(self): nodes = six.StringIO("""\ id is_sample time 0 1 0 1 1 0 2 1 0 3 1 2 """) edges = six.StringIO("""\ left right parent child 0 1 3 0 0 1 3 1 0 1 3 2 """) tree = (" 3 \n" "┏━╋━┓\n" "0 1 2\n") ts = msprime.load_text(nodes, edges, strict=False) t = next(ts.trees()) self.verify_text_rendering(t, tree)
def fastARG_out_to_ts(fastARG_out_filehandle, variant_positions, seq_len=None): """ The same as fastARG_out_to_msprime_txts, but use temporary files and return a ts. """ with tempfile.NamedTemporaryFile("w+") as nodes, \ tempfile.NamedTemporaryFile("w+") as edges, \ tempfile.NamedTemporaryFile("w+") as sites, \ tempfile.NamedTemporaryFile("w+") as mutations: fastARG_out_to_ts_txts(fastARG_out_filehandle, variant_positions, nodes, edges, sites, mutations, seq_len=seq_len) ts = msprime.load_text(nodes=nodes, edges=edges, sites=sites, mutations=mutations).simplify() return ts
def test_simple_tree(self): nodes = six.StringIO("""\ id is_sample time 0 1 0 1 1 0 2 1 2 """) edges = six.StringIO("""\ left right parent child 0 1 2 0 0 1 2 1 """) tree = ( " 2 \n" "┏┻┓\n" "0 1") ts = msprime.load_text(nodes, edges, strict=False) t = next(ts.trees()) drawn = t.draw(format="unicode") self.verify_text_rendering(drawn, tree)
def test_pitchfork_tree(self): nodes = six.StringIO("""\ id is_sample time 0 1 0 1 1 0 2 1 0 3 1 0 4 1 2 """) edges = six.StringIO("""\ left right parent child 0 1 4 0 0 1 4 1 0 1 4 2 0 1 4 3 """) tree = (" 4 \n" "┏━┳┻┳━┓\n" "0 1 2 3\n") ts = msprime.load_text(nodes, edges, strict=False) t = next(ts.trees()) self.verify_text_rendering(t, tree)
def test_odd_num_children_tree(self): nodes = six.StringIO("""\ id is_sample time 0 1 0 1 1 1 2 1 2 3 1 1 4 1 4 5 1 5 """) edges = six.StringIO("""\ left right parent child 0 1 5 0 0 1 5 1 0 1 5 2 0 1 5 3 0 1 5 4 """) ts = msprime.load_text(nodes, edges, strict=False) t = next(ts.trees()) text = t.draw(format=self.drawing_format) self.verify_basic_text(text)
def test_pitchfork_tree(self): nodes = six.StringIO("""\ id is_sample time 0 1 0 1 1 0 2 1 0 3 1 0 4 1 2 """) edges = six.StringIO("""\ left right parent child 0 1 4 0 0 1 4 1 0 1 4 2 0 1 4 3 """) tree = ( " 4 \n" "┏━┳┻┳━┓\n" "0 1 2 3\n") ts = msprime.load_text(nodes, edges, strict=False) t = next(ts.trees()) # No labels tree = ( " ┃ \n" "┏━┳┻┳━┓\n" "┃ ┃ ┃ ┃\n") drawn = t.draw(format="unicode", node_labels={}) self.verify_text_rendering(drawn, tree) # Some lables tree = ( " ┃ \n" "┏━┳┻┳━┓\n" "0 ┃ ┃ 3\n") drawn = t.draw(format="unicode", node_labels={0: "0", 3: "3"}) self.verify_text_rendering(drawn, tree)
def test_case_2(self): # Here are the trees: # t | | | | # # 0 --3-- | --3-- | --3-- | --3-- | --3-- # / | \ | / | \ | / \ | / \ | / \ # 1 4 | 5 | 4 | 5 | 4 5 | 4 5 | 4 5 # |\ / \ /| | |\ \ | |\ / | |\ / | |\ /| # 2 | 6 7 | | | 6 7 | | 6 7 | | 6 7 | | 6 7 | # | |\ /| | | | \ | | | \ | | | \ | | \ | ... # 3 | | 8 | | | | 8 | | | 8 | | | 8 | | 8 | # | |/ \| | | | / | | | / | | | / \ | | / \ | # 4 | 9 10 | | | 9 10 | | 9 10 | | 9 10 | | 9 10 | # |/ \ / \| | | \ \ | | \ \ | | \ \ | | \ | # 5 0 1 2 | 0 1 2 | 0 1 2 | 0 1 2 | 0 1 2 # # | 0.0 - 0.1 | 0.1 - 0.2 | 0.2 - 0.4 | 0.4 - 0.5 # ... continued: # t | | | | # # 0 --3-- | --3-- | --3-- | --3-- | --3-- # / \ | / \ | / \ | / \ | / | \ # 1 4 5 | 4 5 | 4 5 | 4 5 | 4 | 5 # |\ /| | \ /| | \ /| | \ /| | / /| # 2 | 6 7 | | 6 7 | | 6 7 | | 6 7 | | 6 7 | # | \ | | \ | | / | | | / | | | / | # 3 ... | 8 | | 8 | | 8 | | | 8 | | | 8 | # | / \ | | / \ | | / \ | | | \ | | | \ | # 4 | 9 10 | | 9 10 | | 9 10 | | 9 10 | | 9 10 | # | / | | / / | | / / | | / / | | / / | # 5 0 1 2 | 0 1 2 | 0 1 2 | 0 1 2 | 0 1 2 # # 0.5 - 0.6 | 0.6 - 0.7 | 0.7 - 0.8 | 0.8 - 0.9 | 0.9 - 1.0 # divergence betw 0 and 1 true_diversity_01 = 2 * (0.6 * 4 + 0.2 * 2 + 0.2 * 5) # divergence betw 1 and 2 true_diversity_12 = 2 * (0.2 * 5 + 0.2 * 2 + 0.3 * 5 + 0.3 * 4) # divergence betw 0 and 2 true_diversity_02 = 2 * (0.2 * 5 + 0.2 * 4 + 0.3 * 5 + 0.1 * 4 + 0.2 * 5) # mean divergence between 0, 1 and 0, 2 true_mean_diversity = (0 + true_diversity_02 + true_diversity_01 + true_diversity_12) / 4 # Y(0;1, 2) true_Y = 0.2 * 4 + 0.2 * (4 + 2) + 0.2 * 4 + 0.2 * 2 + 0.2 * (5 + 1) nodes = six.StringIO("""\ is_sample time population 1 0.000000 0 1 0.000000 0 1 0.000000 0 0 5.000000 0 0 4.000000 0 0 4.000000 0 0 3.000000 0 0 3.000000 0 0 2.000000 0 0 1.000000 0 0 1.000000 0 """) edgesets = six.StringIO("""\ left right parent children 0.500000 1.000000 10 1 0.000000 0.400000 10 2 0.600000 1.000000 9 0 0.000000 0.500000 9 1 0.800000 1.000000 8 10 0.200000 0.800000 8 9,10 0.000000 0.200000 8 9 0.700000 1.000000 7 8 0.000000 0.200000 7 10 0.800000 1.000000 6 9 0.000000 0.700000 6 8 0.400000 1.000000 5 2,7 0.100000 0.400000 5 7 0.600000 0.900000 4 6 0.000000 0.600000 4 0,6 0.900000 1.000000 3 4,5,6 0.100000 0.900000 3 4,5 0.000000 0.100000 3 4,5,7 """) ts = msprime.load_text(nodes=nodes, edgesets=edgesets) self.check_pairwise_diversity(ts) self.check_pairwise_diversity_mutations(ts) self.check_Y_stat(ts) self.check_vectorization(ts) # divergence between 0 and 1 A = [[0], [1]] def f(x): return (x[0] > 0) != (x[1] > 0) # branch lengths: self.assertAlmostEqual(branch_length_diversity(ts, [0], [1]), true_diversity_01) self.assertAlmostEqual(ts.branch_stats(A, f), true_diversity_01) self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_diversity_01) # mean divergence between 0, 1 and 0, 2 A = [[0, 1], [0, 2]] n = [len(a) for a in A] def f(x): return float(x[0] * (n[1] - x[1]) + (n[0] - x[0]) * x[1]) / 4.0 # branch lengths: self.assertAlmostEqual(branch_length_diversity(ts, A[0], A[1]), true_mean_diversity) self.assertAlmostEqual(ts.branch_stats(A, f), true_mean_diversity) self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_mean_diversity) # Y-statistic for (0/12) A = [[0], [1, 2]] def f(x): return ((x[0] == 1) and (x[1] == 0)) or ((x[0] == 0) and (x[1] == 2)) # branch lengths: self.assertAlmostEqual(branch_length_Y(ts, 0, 1, 2), true_Y) self.assertAlmostEqual(ts.branch_stats(A, f), true_Y) self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_Y)
class BasicTestCase(FtprimeTestCase): """ Test basic operations. """ nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.20000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 1.00000000 0 1 1 0.00000000 1.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) init_map = {0: 1, 1: 2} def test_init(self): records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) for input_id in self.init_map: node_id = self.init_map[input_id] self.assertEqual(records.tables.nodes.time[node_id], self.init_ts.node(node_id).time) self.assertEqual(records.node_ids[input_id], node_id) self.assertEqual(records.tables.edges.num_rows, self.init_ts.num_edges) def test_add_individual(self): records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) records.add_individual(5, 2.0, population=2) self.assertEqual(records.tables.nodes.num_rows, self.init_ts.num_nodes + 1) self.assertEqual(records.tables.nodes.num_rows, 4) self.assertEqual(records.tables.nodes.time[records.node_ids[5]], 2.0) self.assertEqual(records.tables.nodes.population[records.node_ids[5]], 2) self.assertRaises(ValueError, records.add_individual, 1, 1.5) def test_add_record(self): records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) records.add_individual(4, 2.0, population=2) records.add_individual(5, 2.0, population=2) # adding edges should not change number of nodes self.assertEqual(records.tables.nodes.num_rows, self.init_ts.num_nodes + 2) records.add_record(0.0, 0.5, 0, (4, 5)) records.add_record(0.5, 1.0, 0, (4, )) self.assertEqual(records.tables.nodes.num_rows, self.init_ts.num_nodes + 2) print(records) self.assertEqual(records.tables.edges.num_rows, 5) # initial 2 + 3 added above self.assertEqual(records.tables.edges.parent[2], records.node_ids[0]) self.assertEqual(records.tables.edges.child[2], records.node_ids[4]) self.assertEqual(records.tables.edges.child[3], records.node_ids[5]) self.assertEqual(records.tables.edges.child[4], records.node_ids[4]) # try adding record with parent who doesn't exist self.assertRaises(ValueError, records.add_record, 0.0, 0.5, 8, (0, 1)) def test_update_times(self): records_a = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) # check doing update_times along the way doesn't change things records_a.update_times() records_b = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) for r in (records_a, records_b): r.add_individual(4, 2.0, population=2) r.add_individual(5, 2.0, population=2) r.add_record(0.0, 0.5, 0, (4, 5)) r.add_record(0.5, 1.0, 0, (4, )) records_a.update_times() records_b.update_times() self.assertArrayEqual(records_a.tables.nodes.time, records_b.tables.nodes.time) # check update_times is idempotent records_b.update_times() self.assertArrayEqual(records_a.tables.nodes.time, records_b.tables.nodes.time) # and check is right answer self.assertArrayEqual(records_a.tables.nodes.time, [3, 2.2, 2, 0, 0]) def test_simplify(self): # test that we get the same tree sequence by doing tree_sequence # and simplify -> tree_sequence records = ftprime.ARGrecorder(ts=self.init_ts, node_ids=self.init_map) records.add_individual(4, 2.0, population=2) records.add_individual(5, 2.0, population=2) records.add_record(0.0, 0.5, 0, (4, 5)) records.add_record(0.5, 1.0, 0, (4, )) print(records) tsa = records.tree_sequence([4, 5]) print("---------------- sequence a -----------") print(tsa.dump_tables()) records.simplify([4, 5]) tsb = records.tree_sequence([4, 5]) print("---------------- sequence b -----------") print(tsb.dump_tables()) self.check_trees(tsa, tsb) def test_simplify2(self): # test that nonsensical sequence_length gets caught self.assertRaises(ValueError, ftprime.ARGrecorder, ts=self.init_ts, node_ids=self.init_map, sequence_length=0.5)
# coding: utf-8 import msprime ts = msprime.load_text(edges=open('edges_full.txt'), nodes=open('nodes_full.txt')) full_node_map = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K'} print("Full trees") for t in ts.trees(): print(t.draw(format='unicode', node_label_text=full_node_map)) print("Simplifed trees with to J (9), K (10) with `ts.simplify()`)") tss = ts.simplify(samples=[9, 10]) for t in tss.trees(): print(t.draw(format='unicode')) print("Simplifed trees from tables in fig 5C", "with J (0) and K(1) marked as samples.") tss2 = msprime.load_text(edges=open('edges.txt'), nodes=open('nodes.txt')) for t in tss2.trees(): print(t.draw(format='unicode')) print("Raw tables from `ts.simplify()`:") print(tss.dump_tables()) print("\n\n\n...and from the trees loaded from text tables:") print(tss2.dump_tables())
import msprime from VisualizeTrees import * nodes = open("toyNodeTable.txt","r") edges = open("toyEdgeTable.txt","r") ts = msprime.load_text(nodes=nodes,edges=edges) img = VisualizeNodes(ts,rescaled_time=False) for t in ts.trees(): print(t.draw(format="unicode")) img.show()
class ExplicitTestCase(FtprimeTestCase): """ An explicit test case. With `(i,j,x)->k` denoting that individual `k` inherits from `i` on `[0,x)` and from `j` on `[x,1)`: 1. Begin with an individual `a` (and another anonymous one) at `t=0`. 2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1` 3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2` 4. `(d,e,0.7)->f` at `t=3` 5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`. 6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`. 7. We sample `i`, `j` and `k`. Here are the trees: ``` t | | | | | | | | | 0 --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- / | \ | / | \ | / \ | / \ | / \ | / \ | / \ | / \ | / \ | / | \ 1 b | c | b | c | b c | b c | b c | b c | b c | b c | b c | b | c |\ / \ /| | |\ \ | | |\ /| | |\ /| | |\ / | |\ / | \ / | \ / | \ / | / / 2 | d e | | | d e | | | d e | | | d e | | | d e | | d e | d e | d e | d e | d e | |\ /| | | | \ | | | | \ | | | | \ | | | \ | | \ | \ | / | | / | | / 3 | | f | | | | f | | | | f | | | | f | | | f | | f | f | f | | f | | f | |/ \| | | | / | | | | / | | | | / \ | | | / \ | | / \ | / \ | / \ | | \ | | \ 4 | g h | | | g h | | | g h | | | g h | | | g h | | g h | g h | g h | g h | g h |/ \ / \| | | \ | | | \ | | | \ | | | \ \ | | / \ | / / \ | / / \ | / / \ | / / \ 5 i j k | i j k | i j k | i j k | i j k | i j k | i j k | i j k | i j k | i j k | 0.0 - 0.1 | 0.1 - 0.2 | 0.2 - 0.4 | 0.4 - 0.5 | 0.5 - 0.6 | 0.6 - 0.7 | 0.7 - 0.8 | 0.8 - 0.9 | 0.9 - 1.0 ``` and a labeling of the lineages ``` t | | | | | | | | | 0 --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- | --a-- / | \ | / | \ | / \ | / \ | / \ | / \ | / \ | / \ | / \ | / | \ 1 b | c | b | c | b c | b c | b c | b c | b c | b c | b c | b | c |\ / \ /| | |\ \ | | |\ /| | |\ /| | |\ / | |\ / | \ / | \ / | \ / | / / 2 | d e | | | d e | | | d e | | | d e | | | d e | | d e | d e | d e | d e | d e | |\ /| | | | \ | | | | \ | | | | \ | | | \ | | \ | \ | / | | / | | / 3 | | f | | | | f | | | | f | | | | f | | | f | | f | f | f | | f | | f | |/ \| | | | / | | | | / | | | | / \ | | | / \ | | / \ | / \ | / \ | | \ | | \ 4 | g h | | | g h | | | g h | | | g h | | | g h | | g h | g h | g h | g h | g h |/ \ / \| | | \ | | | \ | | | \ | | | \ \ | | / \ | / / \ | / / \ | / / \ | / / \ 5 i j k | i j k | i j k | i j k | i j k | i j k | i j k | i j k | i j k | i j k | 0.0 - 0.1 | 0.1 - 0.2 | 0.2 - 0.4 | 0.4 - 0.5 | 0.5 - 0.6 | 0.6 - 0.7 | 0.7 - 0.8 | 0.8 - 0.9 | 0.9 - 1.0 ``` """ def f(self, arg, lparent, rparent, breakpoint, child, btime): arg.add_individual(self.ids[child], btime) if breakpoint > 0.0: arg.add_record(0.0, breakpoint, self.ids[lparent], (self.ids[child], )) if breakpoint < 1.0: arg.add_record(breakpoint, 1.0, self.ids[rparent], (self.ids[child], )) # the correct tree sequence, unsimplified nodes = six.StringIO("""\ id is_sample population time 0 0 -1 5.00000000000000 # a 1 0 -1 4.00000000000000 # b 2 0 -1 4.00000000000000 # c 3 0 -1 3.00000000000000 # d 4 0 -1 3.00000000000000 # e 5 0 -1 2.00000000000000 # f 6 0 -1 1.00000000000000 # g 7 0 -1 1.00000000000000 # h 8 1 -1 0.00000000000000 # i 9 1 -1 0.00000000000000 # j 10 1 -1 0.00000000000000 # k """) edges = six.StringIO("""\ id left right parent child 0 0.40000000 0.50000000 7 10 0 0.50000000 1.00000000 7 9 0 0.50000000 1.00000000 7 10 0 0.00000000 0.50000000 6 9 0 0.60000000 1.00000000 6 8 0 0.00000000 0.20000000 5 6 0 0.20000000 0.80000000 5 6 0 0.20000000 0.80000000 5 7 0 0.80000000 1.00000000 5 7 0 0.00000000 0.20000000 4 7 0 0.70000000 1.00000000 4 5 0 0.00000000 0.70000000 3 5 0 0.80000000 1.00000000 3 6 0 0.00000000 0.10000000 2 10 0 0.10000000 0.40000000 2 4 0 0.10000000 0.40000000 2 10 0 0.40000000 1.00000000 2 4 0 0.00000000 0.60000000 1 3 0 0.00000000 0.60000000 1 8 0 0.60000000 0.90000000 1 3 0 0.00000000 0.10000000 0 1 0 0.00000000 0.10000000 0 2 0 0.00000000 0.10000000 0 4 0 0.10000000 0.90000000 0 1 0 0.10000000 0.90000000 0 2 0 0.90000000 1.00000000 0 1 0 0.90000000 1.00000000 0 2 0 0.90000000 1.00000000 0 3 """) true_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) true_tss = true_ts.simplify() ids = dict([(y, x) for x, y in enumerate( ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'])]) true_times = [0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 5] sample_ids = ('i', 'j', 'k') sample_input_ids = [8, 9, 10] def test_build_ts(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 1.00000000 0 1 1 0.00000000 1.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) # 1. Begin with an individual `a` (and another anonymous one) at `t=0`. # taken care of in init_ts # arg.add_individual(self.ids['a'], 0.0) # # 2. `(a,?,1.0)->b` and `(a,?,1.0)->c` at `t=1` # self.f(arg, 'a', 'z', 1.0, 'b', 1.0) # self.f(arg, 'a', 'z', 1.0, 'c', 1.0) # 3. `(b,a,0.9)->d` and `(a,c,0.1)->e` and then `a` dies at `t=2` self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) # 4. `(d,e,0.7)->f` at `t=3` self.f(arg, 'd', 'e', 0.7, 'f', 3.0) # 5. `(f,d,0.8)->g` and `(e,f,0.2)->h` at `t=4`. self.f(arg, 'f', 'd', 0.8, 'g', 4.0) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) # 6. `(b,g,0.6)->i` and `(g,h,0.5)->j` and `(c,h,0.4)->k` at `t=5`. self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) # 7. We sample `i`, `j` and `k`. arg.mark_samples(samples=self.sample_input_ids) arg.update_times() arg_ids = {k: arg.node_ids[self.ids[k]] for k in self.ids} self.assertEqual(arg.tables.nodes.num_rows, len(self.ids)) self.assertEqual(arg.max_time, 5.0) for x in self.ids: self.assertEqual(arg.tables.nodes.time[arg_ids[x]], 5.0 - self.true_times[self.ids[x]]) if x in self.sample_ids: self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], msprime.NODE_IS_SAMPLE) else: self.assertEqual(arg.tables.nodes.flags[arg_ids[x]], 0) tss = arg.tree_sequence(self.sample_input_ids) self.check_trees(tss, self.true_tss) def test_node_times_stable(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent child 0 0.00000000 1.00000000 0 1 1 0.00000000 1.00000000 0 2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) self.f(arg, 'd', 'e', 0.7, 'f', 3.0) self.f(arg, 'f', 'd', 0.8, 'g', 4.0) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) arg.update_times() node_times = { u: arg.tables.nodes.time[arg.node_ids[u]] for u in arg.node_ids } print(arg) arg.simplify(self.sample_input_ids) print(arg) new_node_times = { u: arg.tables.nodes.time[arg.node_ids[u]] for u in arg.node_ids } for u in self.sample_input_ids: self.assertEqual(node_times[u], new_node_times[u]) @unittest.skip def test_intermediate_simplify(self): # build initial tree sequence with just a, b, c nodes = six.StringIO("""\ id is_sample population time 0 0 -1 1.00000000000000 1 1 -1 0.00000000000000 2 1 -1 0.00000000000000 """) edges = six.StringIO("""\ id left right parent children 0 0.00000000 1.00000000 0 1,2 """) init_ts = msprime.load_text(nodes=nodes, edges=edges, strict=False) first_gen = {self.ids[k]: v for k, v in [('a', 0), ('b', 1), ('c', 2)]} arg = ftprime.ARGrecorder(ts=init_ts, node_ids=first_gen, time=1.0) self.f(arg, 'b', 'a', 0.9, 'd', 2.0) self.f(arg, 'a', 'c', 0.1, 'e', 2.0) self.f(arg, 'd', 'e', 0.7, 'f', 3.0) self.f(arg, 'f', 'd', 0.8, 'g', 4.0) # simplify print(arg) arg.simplify(samples=[self.ids[u] for u in ['b', 'c', 'e', 'f', 'g']]) print(arg) self.f(arg, 'e', 'f', 0.2, 'h', 4.0) self.f(arg, 'b', 'g', 0.6, 'i', 5.0) self.f(arg, 'g', 'h', 0.5, 'j', 5.0) self.f(arg, 'c', 'h', 0.4, 'k', 5.0) print(arg) tss = arg.tree_sequence(self.sample_input_ids) self.check_trees(tss, self.true_tss)
def test_case_1(self): # With mutations: # # 1.0 6 # 0.7 / \ 5 # / X / \ # 0.5 X 4 4 / 4 # / / \ / \ / X X # 0.4 X X \ X 3 X / \ # / / X / / X / / \ # 0.0 0 1 2 1 0 2 0 1 2 # (0.0, 0.2), (0.2, 0.8), (0.8, 1.0) # true_diversity_01 = 2 * (1 * (0.2 - 0) + 0.5 * (0.8 - 0.2) + 0.7 * (1.0 - 0.8)) true_diversity_02 = 2 * (1 * (0.2 - 0) + 0.4 * (0.8 - 0.2) + 0.7 * (1.0 - 0.8)) true_diversity_12 = 2 * (0.5 * (0.2 - 0) + 0.5 * (0.8 - 0.2) + 0.5 * (1.0 - 0.8)) nodes = six.StringIO("""\ id is_sample time 0 1 0 1 1 0 2 1 0 3 0 0.4 4 0 0.5 5 0 0.7 6 0 1.0 """) edgesets = six.StringIO("""\ left right parent children 0.2 0.8 3 0,2 0.0 0.2 4 1,2 0.2 0.8 4 1,3 0.8 1.0 4 1,2 0.8 1.0 5 0,4 0.0 0.2 6 0,4 """) sites = six.StringIO("""\ id position ancestral_state 0 0.05 0 1 0.1 0 2 0.11 0 3 0.15 0 4 0.151 0 5 0.3 0 6 0.6 0 7 0.9 0 8 0.95 0 9 0.951 0 """) mutations = six.StringIO("""\ site node derived_state 0 4 1 1 0 1 2 2 1 3 0 1 4 1 1 5 1 1 6 2 1 7 0 1 8 1 1 9 2 1 """) ts = msprime.load_text(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations) self.check_pairwise_diversity(ts) self.check_pairwise_diversity_mutations(ts) self.check_Y_stat(ts) self.check_vectorization(ts) # diversity between 0 and 1 A = [[0], [1]] def f(x): return float((x[0] > 0) != (x[1] > 0)) # branch lengths: self.assertAlmostEqual(branch_length_diversity(ts, [0], [1]), true_diversity_01) self.assertAlmostEqual(ts.branch_stats(A, f), true_diversity_01) self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_diversity_01) # mean diversity between [0, 1] and [0, 2]: true_mean_diversity = (0 + true_diversity_02 + true_diversity_01 + true_diversity_12) / 4 A = [[0, 1], [0, 2]] n = [len(a) for a in A] def f(x): return float(x[0] * (n[1] - x[1]) + (n[0] - x[0]) * x[1]) / 4.0 # branch lengths: self.assertAlmostEqual(branch_length_diversity(ts, A[0], A[1]), true_mean_diversity) self.assertAlmostEqual(ts.branch_stats(A, f), true_mean_diversity) self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_mean_diversity) # Y-statistic for (0/12) A = [[0], [1, 2]] def f(x): return ((x[0] == 1) and (x[1] == 0)) or ((x[0] == 0) and (x[1] == 2)) # branch lengths: true_Y = 0.2 * (1 + 0.5) + 0.6 * (0.4) + 0.2 * (0.7 + 0.2) self.assertAlmostEqual(branch_length_Y(ts, 0, 1, 2), true_Y) self.assertAlmostEqual(ts.branch_stats(A, f), true_Y) self.assertAlmostEqual(branch_stats_node_iter(ts, A, f), true_Y)