def write_vcf(chrom):
    treefile = args.tree_file[chrom]
    vcf = open(args.vcffile[chrom], "w")
    mut_rate = args.mut_rate[chrom]
    seed = seeds[chrom]
    logfile.write("Simulating mutations on" + treefile + "\n")
    ts = msprime.load(treefile)
    rng = msprime.RandomGenerator(seed)
    nodes = msprime.NodeTable()
    edgesets = msprime.EdgesetTable()
    sites = msprime.SiteTable()
    mutations = msprime.MutationTable()
    migrations = msprime.MigrationTable()
    ts.dump_tables(nodes=nodes, edgesets=edgesets, migrations=migrations)
    mutgen = msprime.MutationGenerator(rng, mut_rate)
    mutgen.generate(nodes, edgesets, sites, mutations)
    logfile.write("Saving to" + args.vcffile[chrom] + "\n")
    mutated_ts = msprime.load_tables(nodes=nodes,
                                     edgesets=edgesets,
                                     sites=sites,
                                     mutations=mutations)
    mutated_ts.write_vcf(vcf, ploidy=1)

    return True
Beispiel #2
0
    def run(self, ngens):
        nodes = msprime.NodeTable()
        edges = msprime.EdgeTable()
        migrations = msprime.MigrationTable()
        sites = msprime.SiteTable()
        mutations = msprime.MutationTable()
        provenances = msprime.ProvenanceTable()
        if self.deep_history:
            # initial population
            init_ts = msprime.simulate(self.N, recombination_rate=1.0)
            init_ts.dump_tables(nodes=nodes, edges=edges)
            nodes.set_columns(time=nodes.time + ngens, flags=nodes.flags)
        else:
            for _ in range(self.N):
                nodes.add_row(time=ngens)

        pop = list(range(self.N))
        for t in range(ngens - 1, -1, -1):
            if self.debug:
                print("t:", t)
                print("pop:", pop)

            dead = [random.random() > self.survival for k in pop]
            # sample these first so that all parents are from the previous gen
            new_parents = [(random.choice(pop), random.choice(pop))
                           for k in range(sum(dead))]
            k = 0
            if self.debug:
                print("Replacing", sum(dead), "individuals.")
            for j in range(self.N):
                if dead[j]:
                    # this is: offspring ID, lparent, rparent, breakpoint
                    offspring = nodes.num_rows
                    nodes.add_row(time=t)
                    lparent, rparent = new_parents[k]
                    k += 1
                    bp = self.random_breakpoint()
                    if self.debug:
                        print("--->", offspring, lparent, rparent, bp)
                    pop[j] = offspring
                    if bp > 0.0:
                        edges.add_row(left=0.0,
                                      right=bp,
                                      parent=lparent,
                                      child=offspring)
                    if bp < 1.0:
                        edges.add_row(left=bp,
                                      right=1.0,
                                      parent=rparent,
                                      child=offspring)

        if self.debug:
            print("Done! Final pop:")
            print(pop)
        flags = [(msprime.NODE_IS_SAMPLE if u in pop else 0)
                 for u in range(nodes.num_rows)]
        nodes.set_columns(time=nodes.time, flags=flags)
        if self.debug:
            print("Done.")
            print("Nodes:")
            print(nodes)
            print("Edges:")
            print(edges)
        return msprime.TableCollection(nodes, edges, migrations, sites,
                                       mutations, provenances)
Beispiel #3
0
def _load_legacy_hdf5_v10(root, remove_duplicate_positions=False):
    # We cannot have duplicate positions in v10, so this parameter is ignored
    nodes_group = root["nodes"]
    nodes = msprime.NodeTable()
    metadata = None
    metadata_offset = None
    if "metadata" in nodes_group:
        metadata = nodes_group["metadata"]
        metadata_offset = nodes_group["metadata_offset"]
    nodes.set_columns(flags=nodes_group["flags"],
                      population=nodes_group["population"],
                      time=nodes_group["time"],
                      metadata=metadata,
                      metadata_offset=metadata_offset)

    edges_group = root["edges"]
    edges = msprime.EdgeTable()
    edges.set_columns(left=edges_group["left"],
                      right=edges_group["right"],
                      parent=edges_group["parent"],
                      child=edges_group["child"])

    migrations_group = root["migrations"]
    migrations = msprime.MigrationTable()
    if "left" in migrations_group:
        migrations.set_columns(left=migrations_group["left"],
                               right=migrations_group["right"],
                               node=migrations_group["node"],
                               source=migrations_group["source"],
                               dest=migrations_group["dest"],
                               time=migrations_group["time"])

    sites_group = root["sites"]
    sites = msprime.SiteTable()
    if "position" in sites_group:
        metadata = None
        metadata_offset = None
        if "metadata" in sites_group:
            metadata = sites_group["metadata"]
            metadata_offset = sites_group["metadata_offset"]
        sites.set_columns(
            position=sites_group["position"],
            ancestral_state=sites_group["ancestral_state"],
            ancestral_state_offset=sites_group["ancestral_state_offset"],
            metadata=metadata,
            metadata_offset=metadata_offset)

    mutations_group = root["mutations"]
    mutations = msprime.MutationTable()
    if "site" in mutations_group:
        metadata = None
        metadata_offset = None
        if "metadata" in mutations_group:
            metadata = mutations_group["metadata"]
            metadata_offset = mutations_group["metadata_offset"]
        mutations.set_columns(
            site=mutations_group["site"],
            node=mutations_group["node"],
            parent=mutations_group["parent"],
            derived_state=mutations_group["derived_state"],
            derived_state_offset=mutations_group["derived_state_offset"],
            metadata=metadata,
            metadata_offset=metadata_offset)

    provenances_group = root["provenances"]
    provenances = msprime.ProvenanceTable()
    if "timestamp" in provenances_group:
        timestamp = provenances_group["timestamp"]
        timestamp_offset = provenances_group["timestamp_offset"]
        if "record" in provenances_group:
            record = provenances_group["record"]
            record_offset = provenances_group["record_offset"]
        else:
            record = np.empty_like(timestamp)
            record_offset = np.zeros_like(timestamp_offset)
        provenances.set_columns(timestamp=timestamp,
                                timestamp_offset=timestamp_offset,
                                record=record,
                                record_offset=record_offset)
    provenances.add_row(_get_upgrade_provenance(root))

    return msprime.load_tables(nodes=nodes,
                               edges=edges,
                               migrations=migrations,
                               sites=sites,
                               mutations=mutations,
                               provenances=provenances)