def record_edge(self, left, right, parent, child): """ Adds an edge to the output list. """ if child not in self.edge_buffer: self.edge_buffer[child] = [ msprime.Edge(left, right, parent, child) ] else: last = self.edge_buffer[child][-1] if last.right == left: last.right = right else: self.edge_buffer[child].append( msprime.Edge(left, right, parent, child))
def test_partial_overlap(self): num_sites = 7 sample_data = tsinfer.SampleData.initialise(3, num_sites) for j in range(num_sites): sample_data.add_variant(j, ["0", "1"], [0, 1, 1]) sample_data.finalise() ancestor_data = tsinfer.AncestorData.initialise(sample_data) ancestor_data.add_ancestor( # ID 0 start=0, end=7, focal_sites=[], time=5, haplotype=[0, 0, 0, 0, 0, 0, 0]) ancestor_data.add_ancestor( # ID 1 start=0, end=7, focal_sites=[], time=4, haplotype=[0, 0, 0, 0, 0, 0, 0]) ancestor_data.add_ancestor( # ID 2 start=0, end=3, focal_sites=[2], time=3, haplotype=[0, 0, 1, 0, 0, 0, 0]) ancestor_data.add_ancestor( # ID 3 start=3, end=7, focal_sites=[4, 6], time=2, haplotype=[-1, -1, -1, 0, 1, 0, 1]) ancestor_data.add_ancestor( # ID 4 start=0, end=7, focal_sites=[0, 1, 3, 5], time=1, haplotype=[1, 1, 1, 1, 1, 1, 1]) ancestor_data.finalise() expected_edges = [ msprime.Edge(0, 7, 0, 1), msprime.Edge(0, 3, 2, 4), msprime.Edge(3, 7, 3, 4), msprime.Edge(3, 7, 1, 3), msprime.Edge(0, 3, 1, 2) ] self.verify_edges(sample_data, ancestor_data, expected_edges)
def edge_diffs(self): M = self._tree_sequence.get_num_edges() sequence_length = self._tree_sequence.get_sequence_length() edges = [msprime.Edge(*self._tree_sequence.get_edge(j)) for j in range(M)] time = [self._tree_sequence.get_node(edge.parent)[1] for edge in edges] in_order = sorted(range(M), key=lambda j: ( edges[j].left, time[j], edges[j].parent, edges[j].child)) out_order = sorted(range(M), key=lambda j: ( edges[j].right, -time[j], -edges[j].parent, -edges[j].child)) j = 0 k = 0 left = 0 while j < M or left < sequence_length: e_out = [] e_in = [] while k < M and edges[out_order[k]].right == left: h = out_order[k] e_out.append(edges[h]) k += 1 while j < M and edges[in_order[j]].left == left: h = in_order[j] e_in.append(edges[h]) j += 1 right = sequence_length if j < M: right = min(right, edges[in_order[j]].left) if k < M: right = min(right, edges[out_order[k]].right) yield (left, right), e_out, e_in left = right
def test_edge_overlap_bug(self): num_sites = 12 sample_data = tsinfer.SampleData.initialise(3, num_sites) for j in range(num_sites): sample_data.add_variant(j, ["0", "1"], [0, 1, 1]) sample_data.finalise() ancestor_data = tsinfer.AncestorData.initialise(sample_data) ancestor_data.add_ancestor( # ID 0 start=0, end=12, focal_sites=[], time=8, haplotype=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ancestor_data.add_ancestor( # ID 1 start=0, end=12, focal_sites=[], time=7, haplotype=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ancestor_data.add_ancestor( # ID 2 start=0, end=4, focal_sites=[], time=6, haplotype=[0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1]) ancestor_data.add_ancestor( # ID 3 start=4, end=12, focal_sites=[], time=5, haplotype=[-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0]) ancestor_data.add_ancestor( # ID 4 start=8, end=12, focal_sites=[9, 11], time=4, haplotype=[-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 0, 1]) ancestor_data.add_ancestor( # ID 5 start=4, end=8, focal_sites=[5, 7], time=3, haplotype=[-1, -1, -1, -1, 0, 1, 0, 1, -1, -1, -1, -1]) ancestor_data.add_ancestor( # ID 6 start=0, end=4, focal_sites=[1, 3], time=2, haplotype=[0, 1, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1]) ancestor_data.add_ancestor( # ID 7 start=0, end=12, focal_sites=[0, 2, 4, 6, 8, 10], time=1, haplotype=[1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0]) ancestor_data.finalise() expected_edges = [ msprime.Edge(0, 12, 0, 1), msprime.Edge(0, 4, 1, 2), msprime.Edge(4, 12, 1, 3), msprime.Edge(8, 12, 1, 4), msprime.Edge(4, 8, 1, 5), msprime.Edge(0, 4, 1, 6), msprime.Edge(0, 4, 1, 7), msprime.Edge(4, 8, 5, 7), msprime.Edge(8, 12, 1, 7) ] self.verify_edges(sample_data, ancestor_data, expected_edges)
def record_edge(self, left, right, parent, child): """ Adds an edge to the output list. """ self.edge_buffer.append( msprime.Edge(left=left, right=right, parent=parent, child=child))
def trees(self): M = self._tree_sequence.get_num_edges() sequence_length = self._tree_sequence.get_sequence_length() edges = [ msprime.Edge(*self._tree_sequence.get_edge(j)) for j in range(M) ] t = [ self._tree_sequence.get_node(j)[1] for j in range(self._tree_sequence.get_num_nodes()) ] in_order = sorted(range(M), key=lambda j: (edges[j].left, t[edges[j].parent], edges[j].parent, edges[j].child)) out_order = sorted(range(M), key=lambda j: (edges[j].right, -t[edges[j].parent], -edges[j].parent, -edges[j].child)) j = 0 k = 0 N = self._tree_sequence.get_num_nodes() st = PythonSparseTree(N) samples = list(self._tree_sequence.get_samples()) for l in range(len(samples)): if l < len(samples) - 1: st.right_sib[samples[l]] = samples[l + 1] if l > 0: st.left_sib[samples[l]] = samples[l - 1] st.above_sample[samples[l]] = True st.is_sample[samples[l]] = True st.left_root = msprime.NULL_NODE if len(samples) > 0: st.left_root = samples[0] u = st.left_root roots = [] while u != -1: roots.append(u) v = st.right_sib[u] if v != -1: assert st.left_sib[v] == u u = v st.left = 0 while j < M or st.left < sequence_length: while k < M and edges[out_order[k]].right == st.left: p = edges[out_order[k]].parent c = edges[out_order[k]].child k += 1 lsib = st.left_sib[c] rsib = st.right_sib[c] if lsib == msprime.NULL_NODE: st.left_child[p] = rsib else: st.right_sib[lsib] = rsib if rsib == msprime.NULL_NODE: st.right_child[p] = lsib else: st.left_sib[rsib] = lsib st.parent[c] = msprime.NULL_NODE st.left_sib[c] = msprime.NULL_NODE st.right_sib[c] = msprime.NULL_NODE # If c is not above a sample then we have nothing to do as we # cannot affect the status of any roots. if st.above_sample[c]: # Compute the new above sample status for the nodes from # p up to root. v = p above_sample = False while v != msprime.NULL_NODE and not above_sample: above_sample = st.is_sample[v] u = st.left_child[v] while u != msprime.NULL_NODE: above_sample = above_sample or st.above_sample[u] u = st.right_sib[u] st.above_sample[v] = above_sample root = v v = st.parent[v] if not above_sample: # root is no longer above samples. Remove it from the root list. lroot = st.left_sib[root] rroot = st.right_sib[root] st.left_root = msprime.NULL_NODE if lroot != msprime.NULL_NODE: st.right_sib[lroot] = rroot st.left_root = lroot if rroot != msprime.NULL_NODE: st.left_sib[rroot] = lroot st.left_root = rroot st.left_sib[root] = msprime.NULL_NODE st.right_sib[root] = msprime.NULL_NODE # Add c to the root list. # print("Insert ", c, "into root list") if st.left_root != msprime.NULL_NODE: lroot = st.left_sib[st.left_root] if lroot != msprime.NULL_NODE: st.right_sib[lroot] = c st.left_sib[c] = lroot st.left_sib[st.left_root] = c st.right_sib[c] = st.left_root st.left_root = c while j < M and edges[in_order[j]].left == st.left: p = edges[in_order[j]].parent c = edges[in_order[j]].child j += 1 # print("insert ", c, "->", p) st.parent[c] = p u = st.right_child[p] lsib = st.left_sib[c] rsib = st.right_sib[c] if u == msprime.NULL_NODE: st.left_child[p] = c st.left_sib[c] = msprime.NULL_NODE st.right_sib[c] = msprime.NULL_NODE else: st.right_sib[u] = c st.left_sib[c] = u st.right_sib[c] = msprime.NULL_NODE st.right_child[p] = c if st.above_sample[c]: v = p above_sample = False while v != msprime.NULL_NODE and not above_sample: above_sample = st.above_sample[v] st.above_sample[ v] = st.above_sample[v] or st.above_sample[c] root = v v = st.parent[v] # print("root = ", root, st.above_sample[root]) if not above_sample: # Replace c with root in root list. # print("replacing", root, "with ", c ," in root list") if lsib != msprime.NULL_NODE: st.right_sib[lsib] = root if rsib != msprime.NULL_NODE: st.left_sib[rsib] = root st.left_sib[root] = lsib st.right_sib[root] = rsib st.left_root = root else: # Remove c from root list. # print("remove ", c ," from root list") st.left_root = msprime.NULL_NODE if lsib != msprime.NULL_NODE: st.right_sib[lsib] = rsib st.left_root = lsib if rsib != msprime.NULL_NODE: st.left_sib[rsib] = lsib st.left_root = rsib st.right = sequence_length if j < M: st.right = min(st.right, edges[in_order[j]].left) if k < M: st.right = min(st.right, edges[out_order[k]].right) assert st.left_root != msprime.NULL_NODE while st.left_sib[st.left_root] != msprime.NULL_NODE: st.left_root = st.left_sib[st.left_root] st.index += 1 # Add in all the sites st.site_list = [ site for site in self._sites if st.left <= site.position < st.right ] yield st st.left = st.right
def store_edge(self, left, right, parent, child): """ Stores the specified edge to the output tree sequence. """ self.edge_buffer.append( msprime.Edge(left=left, right=right, parent=parent, child=child))