def run_verify(args): setup_logging(args) samples = tsinfer.SampleData.load(args.samples) ts = tskit.load(args.tree_sequence) progress_monitor = ProgressMonitor(enabled=args.progress, verify=True) tsinfer.verify(samples, ts, progress_monitor=progress_monitor) summarise_usage()
def verify_from_source(self, remove_leaves): ts = msprime.simulate(15, recombination_rate=1, mutation_rate=2, random_seed=3) samples = tsinfer.SampleData.from_tree_sequence(ts) ancestors_ts = tsinfer.make_ancestors_ts( samples, ts, remove_leaves=remove_leaves) tsinfer.check_ancestors_ts(ancestors_ts) for engine in [tsinfer.PY_ENGINE, tsinfer.C_ENGINE]: final_ts = tsinfer.match_samples(samples, ancestors_ts, engine=engine) tsinfer.verify(samples, final_ts)
def tsinfer_dev( n, L, seed, num_threads=1, recombination_rate=1e-8, error_rate=0, engine="C", log_level="WARNING", precision=None, debug=True, progress=False, path_compression=True, ): np.random.seed(seed) random.seed(seed) L_megabases = int(L * 10**6) # daiquiri.setup(level=log_level) ts = msprime.simulate( n, Ne=10**4, length=L_megabases, recombination_rate=recombination_rate, mutation_rate=1e-8, random_seed=seed, ) if debug: print("num_sites = ", ts.num_sites) assert ts.num_sites > 0 # ts = msprime.mutate(ts, rate=1e-8, random_seed=seed, # model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) samples = tsinfer.SampleData.from_tree_sequence(ts) rho = recombination_rate mu = 1e-3 # 1e-15 # num_alleles = samples.num_alleles(inference_sites=True) # num_sites = samples.num_inference_sites # with tsinfer.AncestorData(samples) as ancestor_data: # t = np.sum(num_alleles) + 1 # for j in range(num_sites): # for allele in range(num_alleles[j]): # ancestor_data.add_ancestor(j, j + 1, t, [j], [allele]) # t -= 1 ancestor_data = tsinfer.generate_ancestors(samples, engine=engine, num_threads=num_threads) ancestors_ts = tsinfer.match_ancestors( samples, ancestor_data, engine=engine, path_compression=True, extended_checks=False, precision=precision, recombination_rate=rho, mutation_rate=mu, ) # print(ancestors_ts.tables) # print("ancestors ts") # for tree in ancestors_ts.trees(): # print(tree.draw_text()) # for site in tree.sites(): # if len(site.mutations) > 1: # print(site.id) # for mutation in site.mutations: # print("\t", mutation.node, mutation.derived_state) # for var in ancestors_ts.variants(): # print(var.genotypes) # print(ancestors_ts.tables) # ancestors_ts = tsinfer.augment_ancestors(samples, ancestors_ts, # [5, 6, 7], engine=engine) ts = tsinfer.match_samples( samples, ancestors_ts, recombination_rate=rho, mutation_rate=mu, path_compression=False, engine=engine, precision=precision, simplify=False, ) print("num_edges = ", ts.num_edges) # # print(ts.draw_text()) # for tree in ts.trees(): # print(tree.draw_text()) # for site in tree.sites(): # if len(site.mutations) > 1: # print(site.id) # for mutation in site.mutations: # print("\t", mutation.node, mutation.derived_state) # # print(ts.tables.edges) # print(ts.dump_tables()) # simplified = ts.simplify() # print("edges before = ", simplified.num_edges) # new_ancestors_ts = insert_srb_ancestors(ts) # ts = tsinfer.match_samples(samples, new_ancestors_ts, # path_compression=False, engine=engine, # simplify=True) # for tree in ts.trees(): # print(tree.interval) # print(tree.draw(format="unicode")) # print(ts.tables.edges) # for tree in ts.trees(): # print(tree.draw(format="unicode")) tsinfer.verify(samples, ts)
def tsinfer_dev(n, L, seed, num_threads=1, recombination_rate=1e-8, error_rate=0, engine="C", log_level="WARNING", debug=True, progress=False, path_compression=True): np.random.seed(seed) random.seed(seed) L_megabases = int(L * 10**6) # daiquiri.setup(level=log_level) ts = msprime.simulate(n, Ne=10**4, length=L_megabases, recombination_rate=recombination_rate, mutation_rate=1e-8, random_seed=seed) if debug: print("num_sites = ", ts.num_sites) assert ts.num_sites > 0 samples = tsinfer.SampleData.from_tree_sequence(ts) ancestor_data = tsinfer.generate_ancestors(samples, engine=engine, num_threads=num_threads) ancestors_ts = tsinfer.match_ancestors(samples, ancestor_data, engine=engine, path_compression=True, extended_checks=False) ancestors_ts = tsinfer.augment_ancestors(samples, ancestors_ts, [5, 6, 7], engine=engine) ts = tsinfer.match_samples(samples, ancestors_ts, path_compression=False, engine=engine, simplify=True) # print(ts.tables.edges) # print(ts.dump_tables()) # simplified = ts.simplify() # print("edges before = ", simplified.num_edges) # new_ancestors_ts = insert_srb_ancestors(ts) # ts = tsinfer.match_samples(samples, new_ancestors_ts, # path_compression=False, engine=engine, # simplify=True) # for tree in ts.trees(): # print(tree.interval) # print(tree.draw(format="unicode")) # print(ts.tables.edges) # for tree in ts.trees(): # print(tree.draw(format="unicode")) tsinfer.verify(samples, ts)