def verify_simple_model( self, n, seed=1, recombination_rate=None, length=None, recombination_map=None ): ts1 = msprime.simulate( n, random_seed=seed, recombination_rate=recombination_rate, length=length, recombination_map=recombination_map, model=self.model, ) tables = tskit.TableCollection(ts1.sequence_length) tables.populations.add_row() for _ in range(n): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0) ts2 = msprime.simulate( from_ts=tables.tree_sequence(), start_time=0, random_seed=seed, recombination_rate=recombination_rate, recombination_map=recombination_map, model=self.model, ) tables1 = ts1.dump_tables() tables2 = ts2.dump_tables() assert len(tables1.populations) assert len(tables2.populations) # TODO use updated tskit APIs for comparisons. tables1.populations.clear() tables2.populations.clear() tables1.populations.metadata_schema = "" tables2.populations.metadata_schema = "" tables1.provenances.clear() tables2.provenances.clear() assert tables1 == tables2
def brute_force_merge_and_simplify(pstate): tc = tskit.TableCollection(pstate.tables.sequence_length) flags = np.zeros(len(pstate.tables.nodes), dtype=np.uint32) for p in pstate.parents: flags[p.n0] = 1 flags[p.n1] = 1 tc.nodes.set_columns( flags=flags, time=-1.0 * (pstate.tables.nodes.time - pstate.tables.nodes.time.max()), ) tc.edges.set_columns( pstate.tables.edges.left, pstate.tables.edges.right, pstate.tables.edges.parent, pstate.tables.edges.child, ) for eb in pstate.buffered_edges: for i in eb[0] + eb[1]: tc.edges.add_row(*i) tc.sort() tc.simplify() return tc.tree_sequence()
def export(self): """ Exports the edges to a tskit tree sequence. NOTE: the individuals themselves are sorted by birth order. The segments w/in an individual are/should be/maybe quite close to sorted. Thus, a full table sort is probably wasteful and we sort segments w/in individuals instead, which can be trivially parallelized across individuals. """ tables = tskit.TableCollection(self.sequence_length) # Map the individuals to their indexes to make debug easier. individuals = { ind.index: j for j, ind in enumerate(reversed(self.individuals)) } for ind in reversed(self.individuals): # print("adding", ind) ret = tables.nodes.add_row( flags=tskit.NODE_IS_SAMPLE if ind.is_alive is True else 0, time=self.time - ind.time) for ind in reversed(self.individuals): segments = sorted( ind.segments, key=lambda x: (-x.child.time, individuals[x.child.index], x.left)) for seg in segments: tables.edges.add_row(left=seg.left, right=seg.right, parent=individuals[ind.index], child=individuals[seg.child.index]) # print(tables) return tables.tree_sequence()
def decompress_zarr(root): tables = tskit.TableCollection(root.attrs["sequence_length"]) coordinates = root["coordinates"][:] tables.individuals.set_columns( flags=root["individuals/flags"], location=root["individuals/location"], location_offset=root["individuals/location_offset"], metadata=root["individuals/metadata"], metadata_offset=root["individuals/metadata_offset"]) tables.nodes.set_columns( flags=root["nodes/flags"], time=root["nodes/time"], population=root["nodes/population"], individual=root["nodes/individual"], metadata=root["nodes/metadata"], metadata_offset=root["nodes/metadata_offset"]) tables.edges.set_columns( left=coordinates[root["edges/left"]], right=coordinates[root["edges/right"]], parent=root["edges/parent"], child=root["edges/child"]) tables.migrations.set_columns( left=coordinates[root["migrations/left"]], right=coordinates[root["migrations/right"]], node=root["migrations/node"], source=root["migrations/source"], dest=root["migrations/dest"], time=root["migrations/time"]) tables.sites.set_columns( position=coordinates[root["sites/position"]], ancestral_state=root["sites/ancestral_state"], ancestral_state_offset=root["sites/ancestral_state_offset"], metadata=root["sites/metadata"], metadata_offset=root["sites/metadata_offset"]) tables.mutations.set_columns( site=root["mutations/site"], node=root["mutations/node"], parent=root["mutations/parent"], derived_state=root["mutations/derived_state"], derived_state_offset=root["mutations/derived_state_offset"], metadata=root["mutations/metadata"], metadata_offset=root["mutations/metadata_offset"]) tables.populations.set_columns( metadata=root["populations/metadata"], metadata_offset=root["populations/metadata_offset"]) tables.provenances.set_columns( timestamp=root["provenances/timestamp"], timestamp_offset=root["provenances/timestamp_offset"], record=root["provenances/record"], record_offset=root["provenances/record_offset"]) return tables.tree_sequence()
def test_missing_data_samples(self): tables = tskit.TableCollection(1.0) tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) tables.sites.add_row(0.5, "A") tables.mutations.add_row(0, 0, "T") ts = tables.tree_sequence() # If we have no samples we still get a list of variants. variants = list(ts.variants(samples=[])) assert len(variants[0].genotypes) == 0 assert not variants[0].has_missing_data assert variants[0].alleles == ("A", "T") # If we have a single sample that's not missing, there's no # missing data. variants = list(ts.variants(samples=[0])) assert len(variants[0].genotypes) == 1 assert variants[0].genotypes[0] == 1 assert not variants[0].has_missing_data assert variants[0].alleles == ("A", "T") # If we have a single sample that is missing, there is # missing data. variants = list(ts.variants(samples=[1])) assert len(variants[0].genotypes) == 1 assert variants[0].genotypes[0] == -1 assert variants[0].has_missing_data assert variants[0].alleles == ("A", "T", None)
def test_no_edges_mutations(self): tables = tskit.TableCollection(1) for _ in range(2): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE) tables.sites.add_row(0, "A") tables.mutations.add_row(0, 0, "T") self.verify(tables.tree_sequence())
def DecodeTree(self, A): """ Take in the array produced by 'EncodeTreeSequence()' and return a the inverse operation to produce a TreeSequence() for testing. """ num_rows = A.shape[0] num_columns = A.shape[1] tables = tskit.TableCollection(sequence_length=num_columns) node_table = tables.nodes edge_table = tables.edges pop_table = tables.populations pop_table.add_row() for row in range(num_rows): flag = 0 time = A[row, 0, 0] if (time == 0.0): flag = 1 node_table.add_row(flags=flag, time=float(time), population=0) for column in range(num_columns): top = A[row, column, 1] bot = A[row, column, 2] # for padding, we don't add edges if ((top < 0) | (bot < 0)): continue parent = GlueInt8(top, bot) edge_table.add_row(left=column, right=column + 1, parent=parent, child=row) # NOQA tables.sort() tables.simplify() ts = tables.tree_sequence() return ts
def export(self): """ Exports the edges to a tskit tree sequence. """ tables = tskit.TableCollection(self.sequence_length) # Map the individuals to their indexes to make debug easier. # THIS IS A TERRIBLE IDEA!!! sorted_individuals = sorted(self.all_reachable(), key=lambda x: x.index) next_ind = 0 for ind in sorted_individuals: while ind.index != next_ind: # Add in a padding node. tables.nodes.add_row(flags=0, time=0) next_ind += 1 ret = tables.nodes.add_row( flags=tskit.NODE_IS_SAMPLE if ind.is_alive is True else 0, time=self.time - ind.time) assert ret == ind.index next_ind += 1 for ind in sorted_individuals: for child, segments in ind.children.items(): for seg in segments: tables.edges.add_row( left=seg.left, right=seg.right, parent=ind.index, child=child.index) # Can't be bothered doing the sorting above to get rid of this, # but it's trivial. tables.sort() return tables.tree_sequence()
def test_equal_internal_node_time(self): # 6 # ┏━┻━┓ # 4 5 # ┏┻┓ ┏┻┓ # 0 1 2 3 tables = tskit.TableCollection(1) for _ in range(4): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) tables.nodes.add_row(0, time=1) tables.nodes.add_row(0, time=1) tables.nodes.add_row(0, time=2) tables.edges.add_row(0, 1, 4, 0) tables.edges.add_row(0, 1, 4, 1) tables.edges.add_row(0, 1, 5, 2) tables.edges.add_row(0, 1, 5, 3) tables.edges.add_row(0, 1, 6, 4) tables.edges.add_row(0, 1, 6, 5) tables.sort() ts = tables.tree_sequence() msout = tsconvert.to_ms(ts) # The current algorithm assumes node times are unique with pytest.raises(ValueError): tsconvert.from_ms(msout)
def caterpillar_tree(n, num_sites=0, num_mutations=1): """ Returns caterpillar tree with n samples. For each of the sites and path of at most n - 2 mutations are put down along the internal nodes. Each site gets exactly the same set of mutations. """ if num_sites > 0 and num_mutations > n - 2: raise ValueError("At most n - 2 mutations allowed") tables = tskit.TableCollection(1) for j in range(n): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) last_node = 0 # Add the internal nodes for j in range(n - 1): u = tables.nodes.add_row(time=j + 1) tables.edges.add_row(0, tables.sequence_length, u, last_node) tables.edges.add_row(0, tables.sequence_length, u, j + 1) last_node = u for j in range(num_sites): tables.sites.add_row(position=(j + 1) / n, ancestral_state="0") node = 2 * n - 3 state = 0 for k in range(num_mutations): state = (state + 1) % 2 tables.mutations.add_row(site=j, derived_state=str(state), node=node) node -= 1 tables.sort() tables.build_index() tables.compute_mutation_parents() return tables.tree_sequence()
def test_multiple_mrcas(self): tables = tskit.TableCollection(sequence_length=1) tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0, individual=-1, time=0) tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0, individual=-1, time=0) tables.edges.add_row(left=0, right=0.5, parent=2, child=1) tables.edges.add_row(left=0.5, right=1, parent=3, child=1) tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0, individual=-1, time=0.1) tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0, individual=-1, time=0.1) tables.edges.add_row(left=0, right=0.5, parent=4, child=0) tables.edges.add_row(left=0.5, right=1, parent=5, child=0) tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0, individual=-1, time=0.15) tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0, individual=-1, time=0.15) tables.edges.add_row(left=0, right=0.5, parent=6, child=2) tables.edges.add_row(left=0, right=0.5, parent=6, child=4) tables.nodes.add_row(flags=0, population=0, individual=-1, time=0.5) tables.edges.add_row(left=0.5, right=1, parent=7, child=3) tables.edges.add_row(left=0.5, right=1, parent=7, child=5) tables.nodes.add_row(flags=0, population=0, individual=-1, time=1) tables.mutations.add_row(site=0, node=1, derived_state="1") tables.mutations.add_row(site=1, node=4, derived_state="1") tables.mutations.add_row(site=2, node=3, derived_state="1") tables.sites.add_row(0.1, "0") tables.sites.add_row(0.2, "0") tables.sites.add_row(0.7, "0") tables.populations.add_row() arg = tables.tree_sequence() rho = np.arange(0.1, 10, 0.1) for r in rho: log_arg_likelihood_exact = math.log(r) - (1 + 2 * r) * 0.1 log_arg_likelihood_exact += math.log(r) - (3 + 2 * r) * 0.05 log_arg_likelihood_exact -= (6 + 2 * r) * 0.35 log_arg_likelihood_exact -= (1 + r) * 0.5 self.assertTrue(math.isclose(log_arg_likelihood_exact, msprime.log_arg_likelihood(arg, r))) theta = np.arange(0.1, 10, 0.1) tree_length = 1.5 for t in theta: unnormalised_mutation_ll_exact = (3 * math.log(tree_length * t) - tree_length * t) unnormalised_mutation_ll_exact -= math.log(tree_length) unnormalised_mutation_ll_exact -= 2 * math.log(2 * tree_length) self.assertTrue(math.isclose( unnormalised_mutation_ll_exact, msprime.unnormalised_log_mutation_likelihood(arg, t)))
def test_zero_has_parent(self): tables = tskit.TableCollection(1) tables.nodes.add_row(time=1, flags=0) tables.nodes.add_row(time=2, flags=0) tables.edges.add_row(0, 1, 1, 0) with self.assertRaises(ValueError): tsinfer.check_ancestors_ts(tables.tree_sequence())
def to_tsk_tree(self): seq_length = 1 tables = tskit.TableCollection(seq_length) def add_node(node): if node.is_leaf(): assert node.label is not None return node.label child_ids = [add_node(child) for child in node.children] # Arbitrarily set parent time +1 from their oldest child max_child_time = max(tables.nodes.time[c] for c in child_ids) parent_id = tables.nodes.add_row(time=max_child_time + 1) for child_id in child_ids: tables.edges.add_row(0, seq_length, parent_id, child_id) return parent_id for _ in range(self.num_leaves): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) add_node(self) # The way in which we're inserting nodes doesn't necessarily # adhere to the ordering constraint on edges, so we have # to sort. tables.sort() return tables.tree_sequence().first()
def test_fromdict_all_values_empty(self): d = tskit.TableCollection(1).asdict() d["reference_sequence"] = dict( data="", url="", metadata_schema="", metadata=b"" ) tables = tskit.TableCollection.fromdict(d) assert not tables.has_reference_sequence()
def test_missing_data(self): tables = tskit.TableCollection(1.0) tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) tables.sites.add_row(0.5, "A") ts = tables.tree_sequence() self.assertRaises(ValueError, list, ts.haplotypes(missing_data_character="A")) for c in ("-", ".", "a"): h = list(ts.haplotypes(missing_data_character=c)) self.assertEqual(h, [c, c]) h = list(ts.haplotypes(isolated_as_missing=True)) self.assertEqual(h, ["-", "-"]) h = list(ts.haplotypes(isolated_as_missing=False)) self.assertEqual(h, ["A", "A"]) h = list(ts.haplotypes()) self.assertEqual(h, ["-", "-"]) # Test deprecated method h = list(ts.haplotypes(impute_missing_data=True)) self.assertEqual(h, ["A", "A"]) h = list(ts.haplotypes(impute_missing_data=False)) self.assertEqual(h, ["-", "-"]) h = list( ts.haplotypes(isolated_as_missing=True, impute_missing_data=True)) self.assertEqual(h, ["-", "-"]) h = list( ts.haplotypes(isolated_as_missing=True, impute_missing_data=False)) self.assertEqual(h, ["-", "-"]) h = list( ts.haplotypes(isolated_as_missing=False, impute_missing_data=True)) self.assertEqual(h, ["A", "A"]) h = list( ts.haplotypes(isolated_as_missing=False, impute_missing_data=False)) self.assertEqual(h, ["A", "A"])
def test_missing_data_samples(self): tables = tskit.TableCollection(1.0) tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) tables.sites.add_row(0.5, "A") tables.mutations.add_row(0, 0, "T") ts = tables.tree_sequence() # If we have no samples we still get a list of variants. variants = list(ts.variants(samples=[])) self.assertEqual(len(variants[0].genotypes), 0) self.assertFalse(variants[0].has_missing_data) self.assertEqual(variants[0].alleles, ("A", "T")) # If we have a single sample that's not missing, there's no # missing data. variants = list(ts.variants(samples=[0])) self.assertEqual(len(variants[0].genotypes), 1) self.assertEqual(variants[0].genotypes[0], 1) self.assertFalse(variants[0].has_missing_data) self.assertEqual(variants[0].alleles, ("A", "T")) # If we have a single sample that is missing, there is # missing data. variants = list(ts.variants(samples=[1])) self.assertEqual(len(variants[0].genotypes), 1) self.assertEqual(variants[0].genotypes[0], -1) self.assertTrue(variants[0].has_missing_data) self.assertEqual(variants[0].alleles, ("A", "T", None))
def test_two_populations_migration(self): n = 10 seed = 1234 ts1 = msprime.simulate( population_configurations=[ msprime.PopulationConfiguration(n), msprime.PopulationConfiguration(0), ], migration_matrix=[[0, 1], [1, 0]], random_seed=seed, ) tables = tskit.TableCollection(1) tables.populations.add_row() tables.populations.add_row() for _ in range(n): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0) ts2 = msprime.simulate( from_ts=tables.tree_sequence(), start_time=0, population_configurations=[ msprime.PopulationConfiguration(), msprime.PopulationConfiguration(), ], migration_matrix=[[0, 1], [1, 0]], random_seed=seed, ) tables1 = ts1.dump_tables() tables2 = ts2.dump_tables() tables1.provenances.clear() tables2.provenances.clear() self.assertEqual(tables1, tables2)
def verify_simple_model( self, n, seed=1, recombination_rate=None, length=None, recombination_map=None ): ts1 = msprime.simulate( n, random_seed=seed, recombination_rate=recombination_rate, length=length, recombination_map=recombination_map, model=self.model, ) tables = tskit.TableCollection(ts1.sequence_length) tables.populations.add_row() for _ in range(n): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0) ts2 = msprime.simulate( from_ts=tables.tree_sequence(), start_time=0, random_seed=seed, recombination_rate=recombination_rate, recombination_map=recombination_map, model=self.model, ) tables1 = ts1.dump_tables() tables2 = ts2.dump_tables() tables1.provenances.clear() tables2.provenances.clear() self.assertEqual(tables1, tables2)
def felsenstein_tables(): """ Return tables for the example tree. """ # # 8 # ┏━┻━━┓ # ┃ 7 # ┃ ┏┻┓ # 6 ┃ ┃ # ┏━┻┓ ┃ ┃ # ┃ 5 ┃ ┃ # ┃ ┏┻┓ ┃ ┃ # 2 3 4 0 1 # tables = tskit.TableCollection(1) for _ in range(5): tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) for j in range(4): tables.nodes.add_row(flags=0, time=j + 1) tables.edges.add_row(0, 1, 7, 0) tables.edges.add_row(0, 1, 7, 1) tables.edges.add_row(0, 1, 6, 2) tables.edges.add_row(0, 1, 5, 3) tables.edges.add_row(0, 1, 5, 4) tables.edges.add_row(0, 1, 6, 5) tables.edges.add_row(0, 1, 8, 6) tables.edges.add_row(0, 1, 8, 7) tables.sort() return tables
def simulate(nsam: int): """ The linear-time algorithm of Hudson, 1990, adapted to use tree sequences The citation for this algorithm is Hudson, Richard R. 1990. “Gene Genealogies and the Coalescent Process.” Oxford Surveys in Evolutionary Biology 7 (1): 44. Time is scaled in units of 2N generations. :param nsam: The sample size :type nsam: int """ tc = tskit.TableCollection(1) nodes = np.arange(2 * nsam - 1, dtype=np.int32) for i in range(nsam): tc.nodes.add_row(time=0.0, flags=tskit.NODE_IS_SAMPLE) time = 0.0 n = nsam while n > 1: # Generate time to next coalescent event, # in units of 2N generations. rcoal = (n * (n - 1)) / 2. tcoal = np.random.exponential(1. / rcoal) time += tcoal # Register a new ancestor node. # The node is not a sample, # so its flag is zero tc.nodes.add_row(time=time, flags=0) # This is the index of the # ancestor node ancestor = 2 * nsam - n # Perform the swap steps # of the algorithm p = np.random.choice(n, 1)[0] c1 = nodes[p] nodes[p] = nodes[n - 1] p = np.random.choice(n - 1, 1)[0] c2 = nodes[p] nodes[p] = nodes[ancestor] # Both c1 an c2 have the same parental # node (nodes[ancestor]). An edge # table requires that child nodes # be sorted in increasing order # per parent, so we enforce that here if c1 > c2: c1, c2 = c2, c1 # Record the edges tc.edges.add_row(parent=ancestor, child=c1, left=0.0, right=1.0) tc.edges.add_row(parent=ancestor, child=c2, left=0.0, right=1.0) n -= 1 return tc.tree_sequence()
def _load_legacy_hdf5_v2(root, remove_duplicate_positions): # Get the coalescence records trees_group = root["trees"] old_timestamp = datetime.datetime.min.isoformat() provenances = tskit.ProvenanceTable() provenances.add_row( timestamp=old_timestamp, record=_get_v2_provenance("generate_trees", trees_group.attrs), ) num_rows = trees_group["node"].shape[0] index = np.arange(num_rows, dtype=int) parent = np.zeros(2 * num_rows, dtype=np.int32) parent[2 * index] = trees_group["node"] parent[2 * index + 1] = trees_group["node"] left = np.zeros(2 * num_rows, dtype=np.float64) left[2 * index] = trees_group["left"] left[2 * index + 1] = trees_group["left"] right = np.zeros(2 * num_rows, dtype=np.float64) right[2 * index] = trees_group["right"] right[2 * index + 1] = trees_group["right"] child = np.array(trees_group["children"], dtype=np.int32).flatten() tables = tskit.TableCollection(np.max(right)) tables.edges.set_columns(left=left, right=right, parent=parent, child=child) cr_node = np.array(trees_group["node"], dtype=np.int32) num_nodes = max(np.max(child), np.max(cr_node)) + 1 sample_size = np.min(cr_node) flags = np.zeros(num_nodes, dtype=np.uint32) population = np.zeros(num_nodes, dtype=np.int32) time = np.zeros(num_nodes, dtype=np.float64) flags[:sample_size] = tskit.NODE_IS_SAMPLE cr_population = np.array(trees_group["population"], dtype=np.int32) cr_time = np.array(trees_group["time"]) time[cr_node] = cr_time population[cr_node] = cr_population if "samples" in root: samples_group = root["samples"] population[:sample_size] = samples_group["population"] if "time" in samples_group: time[:sample_size] = samples_group["time"] tables.nodes.set_columns(flags=flags, population=population, time=time) _set_populations(tables) if "mutations" in root: mutations_group = root["mutations"] _convert_hdf5_mutations(mutations_group, tables.sites, tables.mutations, remove_duplicate_positions) provenances.add_row( timestamp=old_timestamp, record=_get_v2_provenance("generate_mutations", mutations_group.attrs), ) tables.provenances.add_row(_get_upgrade_provenance(root)) tables.sort() return tables.tree_sequence()
def tree(self): tables = tskit.TableCollection(1.0) tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) for j in range(3): tables.nodes.add_row(flags=0, time=j + 1) tables.edges.add_row(left=0, right=1, parent=j + 1, child=j) tables.sort() return tables.tree_sequence().first()
def test_asdict_reference_no_metadata(self): tables = tskit.TableCollection(1) tables.reference_sequence.data = "ABCDEF" d = tables.asdict()["reference_sequence"] assert d["data"] == "ABCDEF" assert d["url"] == "" assert "metadata" not in d assert "metadata_schema" not in d
def test_same_object(self): tables = tskit.TableCollection(1) refseq = tables.reference_sequence tables.reference_sequence.data = "asdf" assert refseq.data == "asdf" # Not clear we want to do this, but keeping the same pattern as the # tables for now. assert tables.reference_sequence is not refseq
def test_write_metadata_schema_fails(self): tables = tskit.TableCollection(1) tables.reference_sequence.data = "abc" ts = tables.tree_sequence() with pytest.raises(AttributeError, match="read-only"): ts.reference_sequence.metadata_schema = ( tskit.MetadataSchema.permissive_json() )
def test_write_metadata_fails(self): tables = tskit.TableCollection(1) tables.reference_sequence.data = "abc" ts = tables.tree_sequence() with pytest.raises(AttributeError, match="read-only"): # NOTE: it can be slightly confusing here because we try to encode # first, and so we don't get an AttributeError for all inputs. ts.reference_sequence.metadata = b"xyz"
def test_zero_has_no_children(self): tables = tskit.TableCollection(1) tables.nodes.add_row(time=1, flags=0) tables.nodes.add_row(time=2, flags=0) tables.nodes.add_row(time=3, flags=0) tables.edges.add_row(0, 1, 2, 1) with pytest.raises(ValueError): tsinfer.check_ancestors_ts(tables.tree_sequence())
def test_mutation_parent_example(self): tables = tskit.TableCollection(1) tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0) tables.sites.add_row(position=0, ancestral_state="A") tables.mutations.add_row(site=0, node=0, derived_state="T") tables.mutations.add_row(site=0, node=0, parent=0, derived_state="A") self.verify(tables.tree_sequence())
def test_fromdict_reference_data(self): d = tskit.TableCollection(1).asdict() d["reference_sequence"] = {"data": "XYZ"} tables = tskit.TableCollection.fromdict(d) assert tables.has_reference_sequence() assert tables.reference_sequence.data == "XYZ" assert tables.reference_sequence.url == "" assert repr(tables.reference_sequence.metadata_schema) == "" assert tables.reference_sequence.metadata == b""
def test_fromdict_reference_url(self): d = tskit.TableCollection(1).asdict() d["reference_sequence"] = {"url": "file://file.fasta"} tables = tskit.TableCollection.fromdict(d) assert tables.has_reference_sequence() assert tables.reference_sequence.data == "" assert tables.reference_sequence.url == "file://file.fasta" assert repr(tables.reference_sequence.metadata_schema) == "" assert tables.reference_sequence.metadata == b""