Esempio n. 1
0
    def load_tables(cls, tables, reference_time=0.0):

        # pull out ancestral states
        alleles = [[x] for x in msprime.unpack_bytes(
            tables.sites.ancestral_state, tables.sites.ancestral_state_offset)]
        derived_state = msprime.unpack_bytes(
            tables.mutations.derived_state,
            tables.mutations.derived_state_offset)
        new_ancestral_state = [b'0' for _ in range(tables.sites.num_rows)]
        new_derived_state = [b'' for _ in derived_state]
        for j in range(tables.mutations.num_rows):
            site = tables.mutations.site[j]
            try:
                allele_index = alleles[site].index(derived_state[j])
            except ValueError:
                allele_index = len(alleles[site])
                alleles[site].append(derived_state[j])

            new_derived_state[j] = bytes(str(allele_index), encoding='utf-8')

        # reset sites and mutations
        new_ds_column, new_ds_offset = msprime.pack_bytes(new_derived_state)
        tables.mutations.set_columns(
            site=tables.mutations.site,
            node=tables.mutations.node,
            derived_state=new_ds_column,
            derived_state_offset=new_ds_offset,
            parent=tables.mutations.parent,
            metadata=tables.mutations.metadata,
            metadata_offset=tables.mutations.metadata_offset)
        new_as_column, new_as_offset = msprime.pack_bytes(new_ancestral_state)
        tables.sites.set_columns(position=tables.sites.position,
                                 ancestral_state=new_as_column,
                                 ancestral_state_offset=new_as_offset,
                                 metadata=tables.sites.metadata,
                                 metadata_offset=tables.sites.metadata_offset)

        # reset time
        tables.nodes.set_columns(flags=tables.nodes.flags,
                                 time=tables.nodes.time - reference_time,
                                 population=tables.nodes.population,
                                 individual=tables.nodes.individual,
                                 metadata=tables.nodes.metadata,
                                 metadata_offset=tables.nodes.metadata_offset)
        tables.migrations.set_columns(left=tables.migrations.left,
                                      right=tables.migrations.right,
                                      node=tables.migrations.node,
                                      source=tables.migrations.source,
                                      dest=tables.migrations.dest,
                                      time=tables.migrations.time -
                                      reference_time)

        ts = tables.tree_sequence()
        ts.reference_time = reference_time
        ts.alleles = alleles

        return ts
Esempio n. 2
0
def extract_mutation_metadata(tables):
    '''
    Returns an iterator over lists of :class:`MutationMetadata` objects containing
    information about the mutations in the tables.

    :param TableCollection tables: The tables, as produced by SLiM.
    '''
    metadata = msprime.unpack_bytes(tables.mutations.metadata,
                                    tables.mutations.metadata_offset)
    for md in metadata:
        yield decode_mutation(md)
Esempio n. 3
0
    def test_annotate_nodes(self):
        for ts in self.get_slim_examples():
            tables = ts.tables
            new_tables = ts.tables
            metadata = []
            for md in msprime.unpack_bytes(tables.nodes.metadata,
                                           tables.nodes.metadata_offset):
                dm = pyslim.decode_node(md)
                edm = pyslim.encode_node(dm)
                self.assertEqual(md, edm)
                metadata.append(dm)

            pyslim.annotate_node_metadata(new_tables, metadata)
            self.assertEqual(tables, new_tables)
Esempio n. 4
0
import pyslim
import msprime

ts = pyslim.load("simple.trees")
tables = ts.tables
print(tables)

# mutations

mut_metadata = []
for md in msprime.unpack_bytes(tables.mutations.metadata,
                               tables.mutations.metadata_offset):
    dm = pyslim.decode_mutation(md)
    edm = pyslim.encode_mutation(dm)
    assert (md == edm)
    mut_metadata.append(dm)

pyslim.annotate_mutations(tables, mut_metadata)

# nodes

node_metadata = []
for md in msprime.unpack_bytes(tables.nodes.metadata,
                               tables.nodes.metadata_offset):
    dn = pyslim.decode_node(md)
    edn = pyslim.encode_node(dn)
    assert (md == edn)
    node_metadata.append(dn)

pyslim.annotate_nodes(tables, node_metadata)
def _set_populations(tables,
                     pop_id=None,
                     selfing_fraction=0.0,
                     female_cloning_fraction=0.0,
                     male_cloning_fraction=0.0,
                     sex_ratio=0.5,
                     bounds_x0=0.0,
                     bounds_x1=0.0,
                     bounds_y0=0.0,
                     bounds_y1=0.0,
                     bounds_z0=0.0,
                     bounds_z1=0.0,
                     migration_records=None):
    '''
    Adds to a TableCollection the information about populations required for SLiM
    to load a tree sequence. This will replace anything already in the Population
    table.
    '''
    num_pops = max(tables.nodes.population) + 1
    for md in msprime.unpack_bytes(tables.individuals.metadata,
                                   tables.individuals.metadata_offset):
        try:
            ind_md = decode_individual(md)
        except:
            raise ValueError("Individuals do not have metadata:" +
                             "need to run set_nodes_individuals() first?")
        assert (ind_md.population < num_pops)
    if pop_id is None:
        pop_id = list(range(num_pops))
    assert (len(pop_id) == num_pops)

    if type(selfing_fraction) is float:
        selfing_fraction = [selfing_fraction for _ in range(num_pops)]
    assert (len(selfing_fraction) == num_pops)

    if type(female_cloning_fraction) is float:
        female_cloning_fraction = [
            female_cloning_fraction for _ in range(num_pops)
        ]
    assert (len(female_cloning_fraction) == num_pops)

    if type(male_cloning_fraction) is float:
        male_cloning_fraction = [
            male_cloning_fraction for _ in range(num_pops)
        ]
    assert (len(male_cloning_fraction) == num_pops)

    if type(sex_ratio) is float:
        sex_ratio = [sex_ratio for _ in range(num_pops)]
    assert (len(sex_ratio) == num_pops)

    if type(bounds_x0) is float:
        bounds_x0 = [bounds_x0 for _ in range(num_pops)]
    assert (len(bounds_x0) == num_pops)

    if type(bounds_x1) is float:
        bounds_x1 = [bounds_x1 for _ in range(num_pops)]
    assert (len(bounds_x1) == num_pops)

    if type(bounds_y0) is float:
        bounds_y0 = [bounds_y0 for _ in range(num_pops)]
    assert (len(bounds_y0) == num_pops)

    if type(bounds_y1) is float:
        bounds_y1 = [bounds_y1 for _ in range(num_pops)]
    assert (len(bounds_y1) == num_pops)

    if type(bounds_z0) is float:
        bounds_z0 = [bounds_z0 for _ in range(num_pops)]
    assert (len(bounds_z0) == num_pops)

    if type(bounds_z1) is float:
        bounds_z1 = [bounds_z1 for _ in range(num_pops)]
    assert (len(bounds_z1) == num_pops)

    if migration_records is None:
        migration_records = [[] for _ in range(num_pops)]
    assert (len(migration_records) == num_pops)
    for mrl in migration_records:
        for mr in mrl:
            assert (type(mr) is PopulationMigrationMetadata)

    population_metadata = [
        PopulationMetadata(*x) for x in
        zip(pop_id, selfing_fraction, female_cloning_fraction,
            male_cloning_fraction, sex_ratio, bounds_x0, bounds_x1, bounds_y0,
            bounds_y1, bounds_z0, bounds_z1, migration_records)
    ]
    annotate_population_metadata(tables, population_metadata)