Python Demographyの例、msprime.Demography Pythonの例

コード例 #1

0

ファイルを表示

ファイル: demographic_models.py プロジェクト: noscode/stdpopsim

def _mallard_black_split():
    id = "MallardBlackDuck_2L19"
    description = "North American Mallard/Black Duck split"
    long_description = """
        This is a model fit to contemporary samples of wild North American
        mallard and black duck, using the "split-migration" model of dadi.
        See Figure 6 of Lavretsky et al 2019.
    """
    T = 632305 / 4  # in generations, not years
    N_BlackDuck = 1.57e6
    N_Mallard = 1.37e6
    # personal communication from Joshua Brown 13 Apr 2021:
    # "Based on the contemporary dataset, the ancestral population size
    # "for the Black duck/Mallard dadi model was 819535."
    N_Anc = 819535
    # the migration rate is reported as 2.82 in each direction.  From the dadi
    # manual, m12 is "the fraction of individuals each generation in pop 1 that
    # are new migrants from pop 2, times the 2Nref". To convert back to real
    # time units (fraction replaced per generation) we divide by 2 * N_anc.
    m = 2.82 / (2 * N_Anc)

    model = msprime.Demography()
    model.add_population(
        initial_size=N_Mallard,
        name="Mallard",
        description="Wild North American mallards",
    )
    model.add_population(
        initial_size=N_BlackDuck,
        name="Black_duck",
        description="Wild black ducks",
    )
    model.add_population(
        initial_size=N_Anc,
        name="Ancestral",
        description="Ancestral population",
    )
    model.set_symmetric_migration_rate(populations=["Mallard", "Black_duck"],
                                       rate=m)

    model.add_population_split(time=T,
                               derived=["Mallard", "Black_duck"],
                               ancestral="Ancestral")

    return stdpopsim.DemographicModel(
        id=id,
        description=description,
        long_description=long_description,
        citations=[
            stdpopsim.Citation(
                author="Lavretsky et al.",
                year=2019,
                doi="https://doi.org/10.1111/mec.15343",
                reasons={stdpopsim.CiteReason.DEM_MODEL},
            )
        ],
        generation_time=4,
        model=model,
        mutation_rate=4.83e-9,
    )

コード例 #2

0

ファイルを表示

    def __init__(self, NA, N1, N2, T, M12, M21):
        model = msprime.Demography()
        model.add_population(initial_size=N1, name="pop1")
        model.add_population(initial_size=N2, name="pop2")
        model.add_population(initial_size=NA, name="ancestral")

        # FIXME This is BACKWARDS in time, so the rates are the other
        # way around forwards time. We should explain this in the documentation
        # (and probably swap around). Seems like there's not really much
        # good reason to have this model in here any more though - what
        # does it do that wouldn't be better done in demes/msprime?
        model.set_migration_rate(source="pop1", dest="pop2", rate=M12)
        model.set_migration_rate(source="pop2", dest="pop1", rate=M21)
        model.add_population_split(time=T,
                                   ancestral="ancestral",
                                   derived=["pop1", "pop2"])
        long_description = """
            A generic isolation with migration model where a single ancestral
            population of size NA splits into two populations of constant size N1
            and N2 time T generations ago, with migration rates M12 and M21 between
            the split populations.
            """
        super().__init__(
            id="IsolationWithMigration",
            description="Generic IM model",
            long_description=long_description,
            model=model,
            generation_time=1,
        )

コード例 #3

0

ファイルを表示

ファイル: test_compression.py プロジェクト: tskit-dev/tszip

 def test_all_fields(self):
     demography = msprime.Demography()
     demography.add_population(name="A", initial_size=10_000)
     demography.add_population(name="B", initial_size=5_000)
     demography.add_population(name="C", initial_size=1_000)
     demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
     ts = msprime.sim_ancestry(
         samples={"A": 1, "B": 1},
         demography=demography,
         random_seed=42,
         record_migrations=True,
     )
     ts = msprime.sim_mutations(ts, rate=1, random_seed=42)
     tables = ts.dump_tables()
     for name, table in tables.table_name_map.items():
         if name not in ["provenances", "edges"]:
             table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
             metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
             metadata, metadata_offset = tskit.pack_strings(metadatas)
             table.set_columns(
                 **{
                     **table.asdict(),
                     "metadata": metadata,
                     "metadata_offset": metadata_offset,
                 }
             )
     tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
     tables.metadata = "Test metadata"
     self.verify(tables.tree_sequence())

コード例 #4

0

ファイルを表示

ファイル: data_gen_np.py プロジェクト: Genomics-HSE/DeepModels2

    def __init__(
        self,
        recombination_rate: float = RHO_HUMAN,
        mutation_rate: float = MU_HUMAN,
        demographic_events: list = None,
        population: int = None,
        number_intervals: int = N,
        splitter=simple_split,  # maust be annotiede
        num_replicates: int = 1,
        lengt: int = L_HUMAN,
        model: str = "hudson",
        random_seed: int = 42,
        sample_size: int = 1,
    ):

        self.sample_size = sample_size
        self.recombination_rate = recombination_rate
        self.mutation_rate = mutation_rate
        self.num_replicates = num_replicates
        if not demographic_events:
            if not population:
                raise BaseException(
                    "Eiter demographic_events or population must be speciefied"
                )
            demographic_events = msprime.Demography()
            demographic_events.add_population(name="A",
                                              initial_size=population)
        self.demographic_events = demographic_events
        self.splitter = splitter
        self.model = model
        self.len = lengt
        self.random_seed = random_seed
        self.number_intervals = number_intervals
        self._data = None

コード例 #5

0

ファイルを表示

def full_ts():
    """
    A tree sequence with data in all fields - duplicated from tskit's conftest.py
    as other test suites using this file will not have that fixture defined.
    """
    demography = msprime.Demography()
    demography.add_population(initial_size=100, name="A")
    demography.add_population(initial_size=100, name="B")
    demography.add_population(initial_size=100, name="C")
    demography.add_population_split(time=10, ancestral="C", derived=["A", "B"])

    ts = msprime.sim_ancestry(
        {"A": 5, "B": 5},
        demography=demography,
        random_seed=1,
        sequence_length=10,
        record_migrations=True,
    )
    assert ts.num_migrations > 0
    assert ts.num_individuals > 0
    ts = msprime.sim_mutations(ts, rate=0.1, random_seed=2)
    assert ts.num_mutations > 0
    tables = ts.dump_tables()
    tables.individuals.clear()

    for ind in ts.individuals():
        tables.individuals.add_row(flags=0, location=[ind.id, ind.id], parents=[-1, -1])

    for name, table in tables.table_name_map.items():
        if name != "provenances":
            table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
            metadatas = [f"n_{name}_{u}" for u in range(len(table))]
            metadata, metadata_offset = tskit.pack_strings(metadatas)
            table.set_columns(
                **{
                    **table.asdict(),
                    "metadata": metadata,
                    "metadata_offset": metadata_offset,
                }
            )
    tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
    tables.metadata = {"A": "Test metadata"}

    tables.reference_sequence.data = "A" * int(tables.sequence_length)
    tables.reference_sequence.url = "https://example.com/sequence"
    tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json()
    tables.reference_sequence.metadata = {"A": "Test metadata"}

    # Add some more provenance so we have enough rows for the offset deletion test.
    for j in range(10):
        tables.provenances.add_row(timestamp="x" * j, record="y" * j)
    return tables.tree_sequence()

コード例 #6

0

ファイルを表示

ファイル: AnaPla.py プロジェクト: noscode/stdpopsim

def LavertskyEtAl2019TwoPop():
    id = "QC-MallardBlackDuck_2L19"

    # Parameters are taken from Fig 6 of Lavertsky et al. (2019)
    # analysis of contemporary samples
    generation_time = 4  # 4 years per generation
    # (see page 7, section 2.6, in Lavertsky et al. (2019)
    N_Mallard = 1.37e6  # Mallard estimated Ne
    # N_Mallard       = 10.37e6  # Mallard estimated Ne
    N_Black_duck = 1.57e6  # Black duck estimated Ne
    N_anc = 819535  # ancestral population size; not reported in the paper,
    # but reported to Peter Ralph in personal communication
    T_div = 632305 / generation_time  # Divergence time

    # symmetric migration model. Reported rates correspond to
    # number of migrants per generation. scaled by the ancestral Ne
    # so m_ij = M_ij / 2N_anc
    # (m_ij is the simulated rate and M_ij is the rate reported in paper)
    m_Mallard_Black = 2.82 / (2 * N_anc)
    m_Black_Mallard = m_Mallard_Black

    model = msprime.Demography()
    model.add_population(name="Mallard",
                         description="Mallard",
                         initial_size=N_Mallard)
    model.add_population(name="Black_duck",
                         description="Black_duck",
                         initial_size=N_Black_duck)
    model.add_population(name="Ancestral",
                         description="Ancestral",
                         initial_size=N_anc)
    model.add_population_split(time=T_div,
                               derived=["Mallard", "Black_duck"],
                               ancestral="Ancestral")
    model.set_migration_rate(source="Mallard",
                             dest="Black_duck",
                             rate=m_Mallard_Black)
    model.set_migration_rate(source="Black_duck",
                             dest="Mallard",
                             rate=m_Black_Mallard)

    return stdpopsim.DemographicModel(
        id=id,
        description=id,
        long_description=id,
        generation_time=generation_time,
        mutation_rate=4.83e-9,
        model=model,
    )

コード例 #7

0

ファイルを表示

def ts_fixture():
    """
    A tree sequence with data in all fields
    """
    demography = msprime.Demography()
    demography.add_population(name="A", initial_size=10_000)
    demography.add_population(name="B", initial_size=5_000)
    demography.add_population(name="C", initial_size=1_000)
    demography.add_population(name="D", initial_size=500)
    demography.add_population(name="E", initial_size=100)
    demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
    ts = msprime.sim_ancestry(
        samples={"A": 10, "B": 10},
        demography=demography,
        sequence_length=5,
        random_seed=42,
        record_migrations=True,
        record_provenance=True,
    )
    ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42)
    tables = ts.dump_tables()
    # Add locations to individuals
    individuals_copy = tables.individuals.copy()
    tables.individuals.clear()
    for i, individual in enumerate(individuals_copy):
        tables.individuals.append(
            individual.replace(location=[i, i + 1], parents=[i - 1, i - 1])
        )
    for name, table in tables.name_map.items():
        if name != "provenances":
            table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
            metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
            metadata, metadata_offset = tskit.pack_strings(metadatas)
            table.set_columns(
                **{
                    **table.asdict(),
                    "metadata": metadata,
                    "metadata_offset": metadata_offset,
                }
            )
    tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
    tables.metadata = "Test metadata"

    # Add some more rows to provenance to have enough for testing.
    for _ in range(3):
        tables.provenances.add_row(record="A")

    return tables.tree_sequence()

コード例 #8

0

ファイルを表示

ファイル: data_gen_np.py プロジェクト: Genomics-HSE/DeepModels2

def generate_demographic_events(
        population: int = None) -> 'msprime.Demography':

    if not population:
        population = give_population_size()
    demography = msprime.Demography()
    demography.add_population(name="A", initial_size=population)

    number_of_events = np.random.randint(*NUMBER_OF_EVENTS_LIMITS)

    times = sorted(np.random.exponential(LAMBDA_EXP, size=number_of_events))

    last_population_size = population
    for t in times:
        last_population_size = max(
            last_population_size * np.random.uniform(*POPULATION_COEFF_LIMITS),
            MIN_POPULATION_NUM)
        demography.add_population_parameters_change(
            t, initial_size=last_population_size)

    return demography

コード例 #9

0

ファイルを表示

    def test_mixed_old_and_new_style(self):
        demography = msprime.Demography()

        def f(
            population_configurations=None,
            migration_matrix=None,
            demographic_events=None,
        ):
            msprime.demography_factory(
                Ne=1,
                demography=demography,
                population_configurations=population_configurations,
                migration_matrix=migration_matrix,
                demographic_events=demographic_events,
            )

        with self.assertRaises(ValueError):
            f(population_configurations=[])
        with self.assertRaises(ValueError):
            f(migration_matrix=[[]])
        with self.assertRaises(ValueError):
            f(demographic_events=[])

コード例 #10

0

ファイルを表示

ファイル: data_gen_np.py プロジェクト: Genomics-HSE/DeepModels2

def generate_demographic_events_complex(
        population: int = None) -> 'msprime.Demography':

    if not population:
        population = give_population_size()

    demography = msprime.Demography()
    demography.add_population(name="A", initial_size=population)

    last_population_size = population
    T = 0
    coal_probability = 0.0
    coal_probability_list = []
    non_coal_probability = 1.0

    while T < 420_000:
        t = np.random.exponential(lambda_exp)
        T += t

        #last_population_size = max(last_population_size * np.random.uniform(*POPULATION_COEFF_LIMITS),
        #                           MIN_POPULATION_NUM)

        coeff = (np.random.uniform(
            *POPULATION_COEFF_LIMIT_COMPLEX))**(np.random.choice([-1, 1]))
        # print(last_population_size)
        last_population_size = min(
            max(last_population_size * coeff, MIN_POPULATION_NUM),
            MAX_POPULATION_NUM)

        demography.add_population_parameters_change(
            T, initial_size=last_population_size)

        coal_probability = non_coal_probability + t / last_population_size
        coal_probability_list.append(coal_probability)
        non_coal_probability = non_coal_probability + (-t /
                                                       last_population_size)
    return demography

コード例 #11

0

ファイルを表示

            md["selection_coeff"] = mut_map[sid]
        tables.mutations.add_row(site=m.site,
                                 node=m.node,
                                 time=m.time,
                                 derived_state=m.derived_state,
                                 parent=m.parent,
                                 metadata={"mutation_list": md_list})
    assert tables.mutations.num_rows == mts.num_mutations
    print(
        f"The selection coefficients range from {min(mut_map.values()):0.2e}")
    print(f"to {max(mut_map.values()):0.2e}.")
    return tables.tree_sequence()


# Snakes:
snake_demog = msprime.Demography()
snake_demog.add_population(name="p0", initial_size=10000)
snakes = msprime.sim_ancestry(
    samples={"p0": 300},  # number of individividuals sampled
    demography=snake_demog,
    recombination_rate=1e-8,
    sequence_length=sequence_length)

snakes = pyslim.annotate_defaults(
    snakes,
    model_type='nonWF',
    slim_generation=1,
)

# add mutations
snakes = add_mutations(snakes,

コード例 #12

0

ファイルを表示

ファイル: sim_msprime.py プロジェクト: stsmall/abc_scripts2

def run_simulation(param_df):
    """Run msprime simulation.

    Parameters
    ----------
    param_df : TYPE
        DESCRIPTION.
    check_demo : TYPE, optional
        DESCRIPTION. The default is True.
    run_stats : TYPE, optional
        DESCRIPTION. The default is False.

    Returns
    -------
    ts : TYPE
        DESCRIPTION.

    """
    demo_events = msp.Demography()

    # set samples sizes, here in diploids. so nsam/2
    sample_sizes = model_dt["sampleSize"]
    samples = {
        f'pop_{i}': sample_size / 2
        for i, sample_size in enumerate(sample_sizes)
    }

    # set population sizes
    init_sizes = [size * ploidy for size in model_dt["initialSize"]]
    for i, init in enumerate(init_sizes):
        demo_events.add_population(name=f"pop_{i}", initial_size=init)

    # set migration rates from migration matrix if > 0
    mig_mat = model_dt["migmat"]
    if np.sum(mig_mat) > 0:
        sym_rates = [
            model_dt["migmat"][i, j] for i, j in zip(
                *np.where(~np.eye(model_dt["migmat"].shape[0], dtype=bool)))
        ]
        if sym_rates.count(sym_rates[0]) == len(sym_rates):
            demo_events.set_migration_rate(source=None,
                                           dest=None,
                                           rate=sym_rates[0])
        else:
            mig_matrix = zip(*mig_mat)
            for p, pop_m in enumerate(mig_matrix):
                for i, m in pop_m:
                    if p != i and m > 0:
                        demo_events.set_migration_rate(source=p,
                                                       dest=i,
                                                       rate=m)

    # build demographic command line
    demo_events = demo_config(param_df, demo_events)

    # set hybrid models
    if hybrid_switch_over:
        model_list = [
            msp.DiscreteTimeWrightFisher(duration=hybrid_switch_over),
            msp.StandardCoalescent(),
        ]
    else:
        model_list = msp.StandardCoalescent()

    # check demo
    if dry_run:
        checkDemo(demo_events)
        return None

    elif vcf:
        tree = msp.sim_ancestry(samples,
                                recombination_rate=param_df["rec_t"],
                                demography=demo_events,
                                sequence_length=model_dt["contig_length"],
                                model=model_list)
        tree = msp.sim_mutations(tree, rate=param_df["mu_t"])
        return tree

    else:
        trees = msp.sim_ancestry(samples,
                                 recombination_rate=param_df["rec_t"],
                                 demography=demo_events,
                                 num_replicates=model_dt["loci"],
                                 sequence_length=model_dt["contig_length"],
                                 model=model_list)
        # calc stats
        stat_mat = np.zeros([model_dt["loci"], header_len])
        length_bp = stats_dt["length_bp"]
        pfe = stats_dt["perfixder"]
        for i, tree in enumerate(trees):
            tree = msp.sim_mutations(tree,
                                     rate=param_df["mu_t"],
                                     model="binary")
            stats_ls = []
            pos, haps, counts, bp = read_trees(tree,
                                               length_bp,
                                               pfe,
                                               seq_error=True)
            stats_dt["breakpoints"] = bp
            popsumstats = PopSumStats(pos, haps, counts, stats_dt)
            for stat in stats_dt["calc_stats"]:
                stat_fx = getattr(popsumstats, stat)
                try:
                    ss = stat_fx()
                    # print(f"{stat} =  {len(ss)}")
                except IndexError:
                    ss = [np.nan] * len(stats_dt["pw_quants"])
                stats_ls.extend(ss)
            stat_mat[i, :] = stats_ls

        return np.nanmean(stat_mat, axis=0)

コード例 #13

0

ファイルを表示

import pyslim
import tskit
import msprime
from IPython.display import SVG
import numpy as np
import subprocess
import os
#import util
# Neutral burn in with msprime, coalescent simulation
breaks = [0, 33333334, 66666667, 100000000]  # the length of the genome?
recomb_map = msprime.RateMap(
    position=breaks,
    rate=[1e-8, 1e-8, 1e-8])  # why do we set the recombination rate this way?
demog_model = msprime.Demography()
demog_model.add_population(initial_size=10000)
print("Working on Snake Sim")
ots = msprime.sim_ancestry(
    samples=1000,  # number of individividuals sampled?
    demography=demog_model,
    # random_seed=5,
    recombination_rate=recomb_map)

ots = pyslim.annotate_defaults(ots, model_type="nonWF", slim_generation=1)
# this is adding anotation or metadata to all of the individuals
mut_map = msprime.RateMap(position=breaks,
                          rate=[1e-10, 1e-10,
                                1e-10])  # what rate(s) would I put in here
mut_model = msprime.SLiMMutationModel(type=2)  # mutation "m2"
ots = msprime.sim_mutations(
    ots,
    rate=mut_map,