Beispiel #1
0
def run_verify(args):
    setup_logging(args)
    samples = tsinfer.SampleData.load(args.samples)
    ts = tskit.load(args.tree_sequence)
    progress_monitor = ProgressMonitor(enabled=args.progress, verify=True)
    tsinfer.verify(samples, ts, progress_monitor=progress_monitor)
    summarise_usage()
Beispiel #2
0
 def verify_from_source(self, remove_leaves):
     ts = msprime.simulate(15, recombination_rate=1, mutation_rate=2, random_seed=3)
     samples = tsinfer.SampleData.from_tree_sequence(ts)
     ancestors_ts = tsinfer.make_ancestors_ts(
         samples, ts, remove_leaves=remove_leaves)
     tsinfer.check_ancestors_ts(ancestors_ts)
     for engine in [tsinfer.PY_ENGINE, tsinfer.C_ENGINE]:
         final_ts = tsinfer.match_samples(samples, ancestors_ts, engine=engine)
     tsinfer.verify(samples, final_ts)
Beispiel #3
0
def tsinfer_dev(
    n,
    L,
    seed,
    num_threads=1,
    recombination_rate=1e-8,
    error_rate=0,
    engine="C",
    log_level="WARNING",
    precision=None,
    debug=True,
    progress=False,
    path_compression=True,
):

    np.random.seed(seed)
    random.seed(seed)
    L_megabases = int(L * 10**6)

    # daiquiri.setup(level=log_level)

    ts = msprime.simulate(
        n,
        Ne=10**4,
        length=L_megabases,
        recombination_rate=recombination_rate,
        mutation_rate=1e-8,
        random_seed=seed,
    )
    if debug:
        print("num_sites = ", ts.num_sites)
    assert ts.num_sites > 0

    # ts = msprime.mutate(ts, rate=1e-8, random_seed=seed,
    #         model=msprime.InfiniteSites(msprime.NUCLEOTIDES))

    samples = tsinfer.SampleData.from_tree_sequence(ts)
    rho = recombination_rate
    mu = 1e-3  # 1e-15

    #     num_alleles = samples.num_alleles(inference_sites=True)
    #     num_sites = samples.num_inference_sites
    #     with tsinfer.AncestorData(samples) as ancestor_data:
    #         t = np.sum(num_alleles) + 1
    #         for j in range(num_sites):
    #             for allele in range(num_alleles[j]):
    #                 ancestor_data.add_ancestor(j, j + 1, t, [j], [allele])
    #                 t -= 1

    ancestor_data = tsinfer.generate_ancestors(samples,
                                               engine=engine,
                                               num_threads=num_threads)

    ancestors_ts = tsinfer.match_ancestors(
        samples,
        ancestor_data,
        engine=engine,
        path_compression=True,
        extended_checks=False,
        precision=precision,
        recombination_rate=rho,
        mutation_rate=mu,
    )
    # print(ancestors_ts.tables)
    # print("ancestors ts")
    # for tree in ancestors_ts.trees():
    #     print(tree.draw_text())
    #     for site in tree.sites():
    #         if len(site.mutations) > 1:
    #             print(site.id)
    #             for mutation in site.mutations:
    #                 print("\t", mutation.node, mutation.derived_state)

    # for var in ancestors_ts.variants():
    #     print(var.genotypes)

    # print(ancestors_ts.tables)

    # ancestors_ts = tsinfer.augment_ancestors(samples, ancestors_ts,
    #         [5, 6, 7], engine=engine)

    ts = tsinfer.match_samples(
        samples,
        ancestors_ts,
        recombination_rate=rho,
        mutation_rate=mu,
        path_compression=False,
        engine=engine,
        precision=precision,
        simplify=False,
    )

    print("num_edges = ", ts.num_edges)

    # # print(ts.draw_text())
    # for tree in ts.trees():
    #     print(tree.draw_text())
    #     for site in tree.sites():
    #         if len(site.mutations) > 1:
    #             print(site.id)
    #             for mutation in site.mutations:
    #                 print("\t", mutation.node, mutation.derived_state)

    # # print(ts.tables.edges)
    # print(ts.dump_tables())

    # simplified = ts.simplify()
    # print("edges before = ", simplified.num_edges)

    # new_ancestors_ts = insert_srb_ancestors(ts)
    # ts = tsinfer.match_samples(samples, new_ancestors_ts,
    #         path_compression=False, engine=engine,
    #         simplify=True)

    #     for tree in ts.trees():
    #         print(tree.interval)
    #         print(tree.draw(format="unicode"))

    # print(ts.tables.edges)
    # for tree in ts.trees():
    #     print(tree.draw(format="unicode"))

    tsinfer.verify(samples, ts)
Beispiel #4
0
def tsinfer_dev(n,
                L,
                seed,
                num_threads=1,
                recombination_rate=1e-8,
                error_rate=0,
                engine="C",
                log_level="WARNING",
                debug=True,
                progress=False,
                path_compression=True):

    np.random.seed(seed)
    random.seed(seed)
    L_megabases = int(L * 10**6)

    # daiquiri.setup(level=log_level)

    ts = msprime.simulate(n,
                          Ne=10**4,
                          length=L_megabases,
                          recombination_rate=recombination_rate,
                          mutation_rate=1e-8,
                          random_seed=seed)
    if debug:
        print("num_sites = ", ts.num_sites)
    assert ts.num_sites > 0

    samples = tsinfer.SampleData.from_tree_sequence(ts)

    ancestor_data = tsinfer.generate_ancestors(samples,
                                               engine=engine,
                                               num_threads=num_threads)
    ancestors_ts = tsinfer.match_ancestors(samples,
                                           ancestor_data,
                                           engine=engine,
                                           path_compression=True,
                                           extended_checks=False)

    ancestors_ts = tsinfer.augment_ancestors(samples,
                                             ancestors_ts, [5, 6, 7],
                                             engine=engine)

    ts = tsinfer.match_samples(samples,
                               ancestors_ts,
                               path_compression=False,
                               engine=engine,
                               simplify=True)

    # print(ts.tables.edges)
    # print(ts.dump_tables())

    # simplified = ts.simplify()
    # print("edges before = ", simplified.num_edges)

    # new_ancestors_ts = insert_srb_ancestors(ts)
    # ts = tsinfer.match_samples(samples, new_ancestors_ts,
    #         path_compression=False, engine=engine,
    #         simplify=True)

    #     for tree in ts.trees():
    #         print(tree.interval)
    #         print(tree.draw(format="unicode"))

    # print(ts.tables.edges)
    # for tree in ts.trees():
    #     print(tree.draw(format="unicode"))

    tsinfer.verify(samples, ts)