def resolve_polytomies(ts, polytomy_func): """ polytomy_func should take a set of edge records, and an edgesets and a nodes object to be added to. """ new_edgesets = msprime.EdgesetTable() nodes, mutations = get_nodes_and_mutations(ts) edge_records = [[]] #store the edge records per parent, split into contiguous blocks for e in ts.edgesets(): #assume records are in order if len(edge_records[0]==0) or e.parent == records[0][0].parent: if e.right==edge_records[-1][-1].left: #contiguous with the last record edge_records[-1].append(e) else: #this is the same parent, but not contiguous edge_records.append([e]) else: #submit records for polytomy resolution - may require new nodes to be created polytomy_func(edge_records, new_edgesets, nodes) edge_records = [[e]] if edge_records: #last loop polytomy_func(edge_records, nodes, new_edgeset) return msprime.load_tables(nodes=nodes, edgesets=new_edgesets, mutations=mutations)
def make_EdgesetTable(left, right, parent, children): e = msprime.EdgesetTable() e.set_columns(left=left, right=right, parent=parent, children=children, children_length=[2] * len(parent)) return e
def __init__(self, gc_interval): """ :param gc_interval: Garbage collection interval """ self.gc_interval = gc_interval self.last_gc_time = 0.0 self.__nodes = msprime.NodeTable() self.__edges = msprime.EdgesetTable() self.__time_sorting = 0.0 self.__time_appending = 0.0 self.__time_simplifying = 0.0 self.__time_prepping = 0.0
def general_mutation_example(): ts = msprime.simulate(10, recombination_rate=1, length=10, random_seed=2) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() ts.dump_tables(nodes=nodes, edgesets=edgesets) sites = msprime.SiteTable() mutations = msprime.MutationTable() sites.add_row(position=0, ancestral_state="A") sites.add_row(position=1, ancestral_state="C") mutations.add_row(site=0, node=0, derived_state="T") mutations.add_row(site=1, node=0, derived_state="G") return msprime.load_tables(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations)
def node_name_example(): ts = msprime.simulate(sample_size=100, recombination_rate=0.1, length=10, random_seed=1) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() ts.dump_tables(nodes=nodes, edgesets=edgesets) new_nodes = msprime.NodeTable() names = ["n_{}".format(u) for u in range(ts.num_nodes)] packed, length = msprime.pack_strings(names) new_nodes.set_columns(name=packed, name_length=length, flags=nodes.flags, time=nodes.time) return msprime.load_tables(nodes=new_nodes, edgesets=edgesets, provenance_strings=[b"sdf"])
def write_vcf(chrom): treefile = args.tree_file[chrom] vcf = open(args.vcffile[chrom], "w") mut_rate = args.mut_rate[chrom] seed = seeds[chrom] logfile.write("Simulating mutations on" + treefile + "\n") ts = msprime.load(treefile) rng = msprime.RandomGenerator(seed) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() migrations = msprime.MigrationTable() ts.dump_tables(nodes=nodes, edgesets=edgesets, migrations=migrations) mutgen = msprime.MutationGenerator(rng, mut_rate) mutgen.generate(nodes, edgesets, sites, mutations) logfile.write("Saving to" + args.vcffile[chrom] + "\n") mutated_ts = msprime.load_tables(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations) mutated_ts.write_vcf(vcf, ploidy=1) return True
logfile.write("Simplified; now writing to treefile (if specified).\n") logfile.write(time.strftime('%X %x %Z') + "\n") logfile.write("----------\n") logfile.flush() if args.treefile is not None: minimal_ts.dump(args.treefile) mut_seed = args.seed logfile.write("Generating mutations with seed " + str(mut_seed) + "\n") logfile.flush() rng = msprime.RandomGenerator(mut_seed) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() minimal_ts.dump_tables(nodes=nodes, edgesets=edgesets) mutgen = msprime.MutationGenerator(rng, args.mut_rate) mutgen.generate(nodes, edgesets, sites, mutations) # print(nodes, file=logfile) # print(edgesets, file=logfile) # print(sites, file=logfile) # print(mutations, file=logfile) mutated_ts = msprime.load_tables(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations)
max_gen = nodes['generation'].max() assert (int(max_gen) == 10 * popsize) # Convert node times from forwards to backwards nodes['generation'] = nodes['generation'] - max_gen nodes['generation'] = nodes['generation'] * -1.0 # Construct and populate msprime's tables flags = np.empty([len(nodes)], dtype=np.uint32) flags.fill(1) nt = msprime.NodeTable() nt.set_columns(flags=flags, population=nodes['population'], time=nodes['generation']) es = msprime.EdgesetTable() es.set_columns(left=edges['left'], right=edges['right'], parent=edges['parent'], children=edges['child'], children_length=[1] * len(edges)) # Sort msprime.sort_tables(nodes=nt, edgesets=es) # Simplify: this is where the magic happens msprime.simplify_tables(samples=samples.tolist(), nodes=nt, edgesets=es) # Create a tree sequence x = msprime.load_tables(nodes=nt, edgesets=es)
def __init__(self, gc_interval=None): self.__nodes = msprime.NodeTable() self.__edges = msprime.EdgesetTable() self.gc_interval = gc_interval self.last_gc_time = 0