Esempio n. 1
0
def resolve_polytomies(ts, polytomy_func):
    """
    polytomy_func should take a set of edge records, and an edgesets and a nodes object
    to be added to.
    """
    new_edgesets = msprime.EdgesetTable()
    nodes, mutations = get_nodes_and_mutations(ts)
    edge_records = [[]] #store the edge records per parent, split into contiguous blocks
    for e in ts.edgesets():    #assume records are in order
        if len(edge_records[0]==0) or e.parent == records[0][0].parent:
            if e.right==edge_records[-1][-1].left:
                #contiguous with the last record
                edge_records[-1].append(e)
            else:
                #this is the same parent, but not contiguous
                edge_records.append([e])
        else:
            #submit records for polytomy resolution - may require new nodes to be created
            polytomy_func(edge_records, new_edgesets, nodes)
            edge_records = [[e]]
    if edge_records:
        #last loop
        polytomy_func(edge_records, nodes, new_edgeset)

    return msprime.load_tables(nodes=nodes, edgesets=new_edgesets, mutations=mutations)
Esempio n. 2
0
def make_EdgesetTable(left, right, parent, children):
    e = msprime.EdgesetTable()
    e.set_columns(left=left,
                  right=right,
                  parent=parent,
                  children=children,
                  children_length=[2] * len(parent))
    return e
 def __init__(self, gc_interval):
     """
     :param gc_interval: Garbage collection interval
     """
     self.gc_interval = gc_interval
     self.last_gc_time = 0.0
     self.__nodes = msprime.NodeTable()
     self.__edges = msprime.EdgesetTable()
     self.__time_sorting = 0.0
     self.__time_appending = 0.0
     self.__time_simplifying = 0.0
     self.__time_prepping = 0.0
Esempio n. 4
0
def general_mutation_example():
    ts = msprime.simulate(10, recombination_rate=1, length=10, random_seed=2)
    nodes = msprime.NodeTable()
    edgesets = msprime.EdgesetTable()
    ts.dump_tables(nodes=nodes, edgesets=edgesets)
    sites = msprime.SiteTable()
    mutations = msprime.MutationTable()
    sites.add_row(position=0, ancestral_state="A")
    sites.add_row(position=1, ancestral_state="C")
    mutations.add_row(site=0, node=0, derived_state="T")
    mutations.add_row(site=1, node=0, derived_state="G")
    return msprime.load_tables(nodes=nodes,
                               edgesets=edgesets,
                               sites=sites,
                               mutations=mutations)
Esempio n. 5
0
def node_name_example():
    ts = msprime.simulate(sample_size=100,
                          recombination_rate=0.1,
                          length=10,
                          random_seed=1)
    nodes = msprime.NodeTable()
    edgesets = msprime.EdgesetTable()
    ts.dump_tables(nodes=nodes, edgesets=edgesets)
    new_nodes = msprime.NodeTable()
    names = ["n_{}".format(u) for u in range(ts.num_nodes)]
    packed, length = msprime.pack_strings(names)
    new_nodes.set_columns(name=packed,
                          name_length=length,
                          flags=nodes.flags,
                          time=nodes.time)
    return msprime.load_tables(nodes=new_nodes,
                               edgesets=edgesets,
                               provenance_strings=[b"sdf"])
def write_vcf(chrom):
    treefile = args.tree_file[chrom]
    vcf = open(args.vcffile[chrom], "w")
    mut_rate = args.mut_rate[chrom]
    seed = seeds[chrom]
    logfile.write("Simulating mutations on" + treefile + "\n")
    ts = msprime.load(treefile)
    rng = msprime.RandomGenerator(seed)
    nodes = msprime.NodeTable()
    edgesets = msprime.EdgesetTable()
    sites = msprime.SiteTable()
    mutations = msprime.MutationTable()
    migrations = msprime.MigrationTable()
    ts.dump_tables(nodes=nodes, edgesets=edgesets, migrations=migrations)
    mutgen = msprime.MutationGenerator(rng, mut_rate)
    mutgen.generate(nodes, edgesets, sites, mutations)
    logfile.write("Saving to" + args.vcffile[chrom] + "\n")
    mutated_ts = msprime.load_tables(nodes=nodes,
                                     edgesets=edgesets,
                                     sites=sites,
                                     mutations=mutations)
    mutated_ts.write_vcf(vcf, ploidy=1)

    return True
logfile.write("Simplified; now writing to treefile (if specified).\n")
logfile.write(time.strftime('%X %x %Z') + "\n")
logfile.write("----------\n")
logfile.flush()

if args.treefile is not None:
    minimal_ts.dump(args.treefile)

mut_seed = args.seed
logfile.write("Generating mutations with seed " + str(mut_seed) + "\n")
logfile.flush()

rng = msprime.RandomGenerator(mut_seed)
nodes = msprime.NodeTable()
edgesets = msprime.EdgesetTable()
sites = msprime.SiteTable()
mutations = msprime.MutationTable()
minimal_ts.dump_tables(nodes=nodes, edgesets=edgesets)
mutgen = msprime.MutationGenerator(rng, args.mut_rate)
mutgen.generate(nodes, edgesets, sites, mutations)

# print(nodes, file=logfile)
# print(edgesets, file=logfile)
# print(sites, file=logfile)
# print(mutations, file=logfile)

mutated_ts = msprime.load_tables(nodes=nodes,
                                 edgesets=edgesets,
                                 sites=sites,
                                 mutations=mutations)
    max_gen = nodes['generation'].max()
    assert (int(max_gen) == 10 * popsize)

    # Convert node times from forwards to backwards
    nodes['generation'] = nodes['generation'] - max_gen
    nodes['generation'] = nodes['generation'] * -1.0

    # Construct and populate msprime's tables
    flags = np.empty([len(nodes)], dtype=np.uint32)
    flags.fill(1)
    nt = msprime.NodeTable()
    nt.set_columns(flags=flags,
                   population=nodes['population'],
                   time=nodes['generation'])

    es = msprime.EdgesetTable()
    es.set_columns(left=edges['left'],
                   right=edges['right'],
                   parent=edges['parent'],
                   children=edges['child'],
                   children_length=[1] * len(edges))

    # Sort
    msprime.sort_tables(nodes=nt, edgesets=es)

    # Simplify: this is where the magic happens
    msprime.simplify_tables(samples=samples.tolist(), nodes=nt, edgesets=es)

    # Create a tree sequence
    x = msprime.load_tables(nodes=nt, edgesets=es)
Esempio n. 9
0
 def __init__(self, gc_interval=None):
     self.__nodes = msprime.NodeTable()
     self.__edges = msprime.EdgesetTable()
     self.gc_interval = gc_interval
     self.last_gc_time = 0