def process_pop(pop): data = [] nodes = np.array(pop.tables.nodes, copy=False) # Get the metadata for ancient samples amd = np.array(pop.ancient_sample_metadata, copy=False) # Get the ancient sample time points times = nodes['time'][amd['nodes'][:, 0]] utimes = np.unique(times) fpos = [] ftimes = [] origins = [] sites = np.array(pop.tables.sites, copy=False) muts = np.array(pop.tables.mutations, copy=False) for i, j in zip(pop.fixation_times, pop.fixations): if i > 10 * pop.N and j.g <= 10 * pop.N + 200: fpos.append(j.pos) ftimes.append((i, j.pos)) origins.append((j.g, j.pos)) ftimes = np.array([i[0] for i in sorted(ftimes, key=lambda x: x[1])]) origins = np.array([i[0] for i in sorted(origins, key=lambda x: x[1])]) fpos = np.array(sorted(fpos)) for ut in utimes: mdidx = np.where(times == ut)[0] samples = amd['nodes'][mdidx].flatten() tables, idmap = fwdpy11.simplify_tables(pop.tables, samples) sites = np.array(tables.sites, copy=False) muts = np.array(tables.mutations, copy=False) tv = fwdpy11.TreeIterator(tables, idmap[samples], update_samples=True) for t in tv: l = t.left r = t.right f = np.where((fpos >= l) & (fpos < r))[0] for i in f: idx = np.where(sites['position'] == fpos[i])[0] if len(idx) > 0: mut_idx = np.where(muts['site'] == idx)[0] assert len(mut_idx == 1), "Bad mutation table error" sbelow = t.samples_below(muts['node'][mut_idx]) individuals = np.unique(sbelow // 2) mg = amd['g'][mdidx[individuals]].mean() gbar = amd['g'][mdidx].mean() vg = amd['g'][mdidx].var() mw = amd['w'][mdidx[individuals]].mean() wbar = amd['w'][mdidx].mean() esize = pop.mutations[muts['key'][mut_idx[0]]].s data.append( Datum(ut, fpos[i], origins[i], ftimes[i], esize, len(sbelow), mw, mg, wbar, gbar, vg)) return data
def __call__(self, pop): assert len(pop.tables.preserved_nodes)//2 == \ len(pop.ancient_sample_metadata) # Get the most recent ancient samples # and record their number. We do this # by a "brute-force" approach for t, n, m in pop.sample_timepoints(False): if t not in self.timepoint_seen: self.timepoint_seen[t] = 1 else: self.timepoint_seen[t] += 1 if t not in self.sample_timepoints: self.sample_timepoints.append(t) self.sample_sizes.append(len(n) // 2) # simplify to each time point tables, idmap = fwdpy11.simplify_tables(pop.tables, n) for ni in n: assert idmap[ni] != fwdpy11.NULL_NODE assert tables.nodes[idmap[ni]].time == t
def get_stats(tables, samples): tables_for_sample, idmap = fwdpy11.simplify_tables(tables, samples) stats = [] for k, l in zip(enumerate(fwdpy11.DataMatrixIterator(tables_for_sample, idmap[samples], WINDOWS, True, True)), WINDOW_LEFTS): i, dm = k locus = int(i*STEPSIZE / LOCUS_LENGTH) window = i % (LOCUS_LENGTH/STEPSIZE) window_in_paper = int(window*STEPSIZE) pos, data = merge_matrix(dm) vm = libsequence.VariantMatrix(data, pos) ac = vm.count_alleles() pi = libsequence.thetapi(ac) D = libsequence.tajd(ac) Hp = libsequence.hprime(ac, 0) nhaps = libsequence.number_of_haplotypes(vm) hdiv = libsequence.haplotype_diversity(vm) stats.append(Datum(l, locus, window, window_in_paper, pi, D, Hp, nhaps, hdiv)) return stats
def count_frequencies(pop, num_neutral): N = pop.N fixations = [] for i, j in zip(pop.fixation_times, pop.fixations): if i >= 10*pop.N and j.g <= 10*pop.N + 200: fixations.append((j.g, j.pos)) if num_neutral > 0: # Add neutral fixations, if there are any # NOTE: we have no idea about fixation times # for neutral mutations, as they were not simulated! for i, j in enumerate(pop.mcounts): if j == 2*pop.N and pop.mutations[i].neutral is True: # Neutral mutations have an origin time # randomly assigned based on branch lengths. # It is just uniform along the branch. g = pop.mutations[i].g if g >= 10*pop.N and g <= 10*pop.N + 200: fixations.append((g, pop.mutations[i].pos)) data = [] pop_data = [] # Iterate over all samaple nodes. # False means to exclude the "alive" individuals # at time pop.generation for t, n, metadata in pop.sample_timepoints(False): if t%N == 0: print(t) tables, idmap = fwdpy11.simplify_tables(pop.tables, n) muts_visited = 0 # we remap the input/output nodes to get the leaves at a given timepoint that carries a mutation # below in ws_samples remap_nodes = [] for i in range(len(metadata)): remap_nodes.append(idmap[metadata[i]['nodes']]) remap_nodes = np.ravel(remap_nodes) # Get the distribution of diploid fitnesses for this generation ws = metadata['w'] pop_data.append(PopFitness(t, ws.mean(), ws.var())) # NOTE: do not update samples lists below nodes. for tree in fwdpy11.TreeIterator(tables, idmap[n], update_samples=True): for m in tree.mutations(): assert m.key < len(pop.mutations) muts_visited += 1 mut_node = m.node # NOTE: infinite sites means # leaf counts are the frequencies dac = tree.leaf_counts(mut_node) pos = tables.sites[m.site].position assert pos == pop.mutations[m.key].pos assert pos >= tree.left and pos < tree.right origin = pop.mutations[m.key].g fixed = int((origin, pos) in fixations) # Get the ws for the samples carrying the mutation m_samples = tree.samples_below(m.node) # There are two nodes per individual, hence the //2 ws_samples = [metadata[np.where(remap_nodes == samp)[0][0]//2]['w'] for samp in m_samples] w_m = np.mean(ws_samples) # Edge case: we skip mutations that fixed prior # to any of these time points fixed_before = False if dac == 2*pop.N and fixed == 0: fixed_before = True if not fixed_before: data.append(AlleleFreq(t, pos, origin, dac, fixed, int(m.neutral), pop.mutations[m.key].label, pop.mutations[m.key].s, w_m)) assert muts_visited == len(tables.mutations) return data, pop_data
def test_simplify_tables_numpy_array(self): tables, idmap = fwdpy11.simplify_tables( self.pop.tables, np.array([i for i in range(10)])) for i in range(10): self.assertTrue(idmap[i] != fwdpy11.NULL_NODE)
def test_simplify_tables(self): tables, idmap = fwdpy11.simplify_tables(self.pop.tables, self.pop.mutations, [i for i in range(10)]) for i in range(10): self.assertTrue(idmap[i] != fwdpy11.NULL_NODE)