コード例 #1
0
def msprime_sim(sample_size, Ne, length, recombination_rate, recombination_map,
                mutation_rate, migration_matrix, demographic_events,
                num_replicates):

    # Create sample list
    samples = []

    for w in range(0, len(modern_samples)):
        x = modern_samples[w]
        for y in range(0, x):
            samples.append(msprime.Sample(w, 0))
    for z in range(0, ancient_samples):
        samples.append(msprime.Sample(0, time))
    print(samples)

    # Create population_configurations list
    population_configurations = []

    for c in range(0, len(modern_samples)):
        population_configurations.append(msprime.PopulationConfiguration())
    print(population_configurations)

    # Run simulation and extract results
    tree_seq = msprime.simulate(
        sample_size=sample_size,
        Ne=Ne,
        length=length,
        recombination_rate=recombination_rate,
        recombination_map=recombination_map,
        mutation_rate=mutation_rate,
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events,
        samples=samples,
        num_replicates=num_replicates)

    return tree_seq
コード例 #2
0
ファイル: dadi_utils.py プロジェクト: tjstruck/analysis
def msprime_to_dadi_simulation_OutOfAfrica(path, seed, chrom, sample_size=20):
    '''
	Generate however many different SFS with msprime and convert+save them into SFS for dadi to use.
	'''
    #For testing
    # print(path, seed, chrom, sample_size)
    chrom = homo_sapiens.genome.chromosomes[chrom]
    model = homo_sapiens.GutenkunstThreePopOutOfAfrica()

    samples_pops_joint = [
        msprime.Sample(population=0, time=0)
    ] * sample_size + [msprime.Sample(population=1, time=0)] * sample_size
    ts_pops_joint = msprime.simulate(
        samples=samples_pops_joint,
        recombination_map=chrom.recombination_map(),
        mutation_rate=chrom.default_mutation_rate,
        random_seed=seed,
        **model.asdict())
    haps_pops_joint = np.array(ts_pops_joint.genotype_matrix())

    #Break up the haplotypes into seperate populations based on sample_size
    haps_pop0_joint = haps_pops_joint[:, :sample_size]
    haps_pop1_joint = haps_pops_joint[:, sample_size:]

    genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles()
    genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles()

    sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1],
                                allele_counts_pop1_joint[:, 1])
    sfs_joint = dadi.Spectrum(sfs_joint)

    sfs_joint.to_file(path)
コード例 #3
0
 def get_example_base(self, num_populations=1, length=1):
     N = num_populations
     population_configurations = [
         msprime.PopulationConfiguration() for _ in range(N)
     ]
     migration_matrix = np.ones((N, N))
     np.fill_diagonal(migration_matrix, 0)
     ts = msprime.simulate(
         samples=[msprime.Sample(0, 0) for _ in range(10)],
         length=length,
         random_seed=155,
         population_configurations=population_configurations,
         migration_matrix=migration_matrix,
     )
     return tsutil.decapitate(ts, ts.num_edges // 2)
コード例 #4
0
 def test_sample_size_population_configuration(self):
     for d in range(1, 5):
         # Zero sample size is always an error
         configs = [msprime.PopulationConfiguration(0) for _ in range(d)]
         self.assertRaises(
             ValueError, msprime.simulator_factory, population_configurations=configs)
         configs = [msprime.PopulationConfiguration(2) for _ in range(d)]
         sim = msprime.simulator_factory(population_configurations=configs)
         self.assertEqual(len(sim.samples), 2 * d)
         samples = []
         for j in range(d):
             samples += [msprime.Sample(population=j, time=0) for _ in range(2)]
         self.assertEqual(sim.samples, samples)
         ll_sim = sim.create_ll_instance()
         self.assertEqual(ll_sim.get_samples(), samples)
コード例 #5
0
 def test_samples(self):
     base_ts = self.get_example_base()
     self.assertRaises(ValueError, msprime.simulate, 2, from_ts=base_ts)
     self.assertRaises(ValueError, msprime.simulate, sample_size=2, from_ts=base_ts)
     self.assertRaises(
         ValueError,
         msprime.simulate,
         samples=[msprime.Sample(0, 0) for _ in range(10)],
         from_ts=base_ts,
     )
     self.assertRaises(
         ValueError,
         msprime.simulate,
         population_configurations=[msprime.PopulationConfiguration(sample_size=2)],
         from_ts=base_ts,
     )
コード例 #6
0
def set_up_pops(nS, tS):
    samples = [msp.Sample(population=DEN3, time=tS[1])] * (
        1 * nS[0])  # Denisovan 3 (Altai)
    samples.extend([msp.Sample(population=AFR, time=tS[0])] *
                   (1 * nS[0]))  # Africa
    samples.extend([msp.Sample(population=CEU, time=tS[0])] *
                   (1 * nS[0]))  # European
    samples.extend([msp.Sample(population=EAS, time=tS[0])] *
                   (1 * nS[0]))  # East Asian
    samples.extend([msp.Sample(population=PAP, time=tS[0])] *
                   (1 * nS[0]))  # Papuan
    samples.extend([msp.Sample(population=AYT, time=tS[0])] *
                   (1 * nS[0]))  # Negrito (Ayta)
    samples.extend([msp.Sample(population=NEA, time=tS[2])] *
                   (1 * nS[0]))  # Neanderthal
    samples.extend([msp.Sample(population=CHM, time=tS[0])] *
                   (1 * nS[0]))  # Chimp
    return samples
コード例 #7
0
ファイル: test_simulate_from.py プロジェクト: leovam/msprime
 def test_samples(self):
     base_ts = self.get_example_base()
     with pytest.raises(ValueError):
         msprime.simulate(2, from_ts=base_ts)
     with pytest.raises(ValueError):
         msprime.simulate(sample_size=2, from_ts=base_ts)
     with pytest.raises(ValueError):
         msprime.simulate(
             samples=[msprime.Sample(0, 0) for _ in range(10)],
             from_ts=base_ts,
         )
     with pytest.raises(ValueError):
         msprime.simulate(
             population_configurations=[
                 msprime.PopulationConfiguration(sample_size=2)
             ],
             from_ts=base_ts,
         )
コード例 #8
0
ファイル: simulations.py プロジェクト: luntergroup/analysis
def simulate(out_path,
             species,
             model,
             genetic_map,
             seed,
             chrmStr,
             sample_size=20,
             population=0):
    chrom = species.genome.chromosomes[chrmStr]
    samples = [msp.Sample(population=population, time=0)] * sample_size
    print("Simulating...")
    ts = msp.simulate(samples=samples,
                      recombination_map=chrom.recombination_map(
                          genetic_map.name),
                      mutation_rate=chrom.default_mutation_rate,
                      random_seed=seed,
                      **model.asdict())
    ts.dump(out_path)
    print("Simulation finished!")
コード例 #9
0
ファイル: test_models.py プロジェクト: schimar/msprime
 def test_wf_hudson_ancient_samples(self):
     Ne = 10
     t = 10
     n = 20
     ts = msprime.simulate(
         samples=[msprime.Sample(time=j, population=0) for j in range(n)],
         model=msprime.DiscreteTimeWrightFisher(Ne),
         demographic_events=[
             msprime.SimulationModelChange(t, msprime.StandardCoalescent(Ne))],
         random_seed=2)
     tree = ts.first()
     self.assertEqual(tree.num_roots, 1)
     times = ts.tables.nodes.time[ts.tables.nodes.flags == 0]
     dtwf_times = times[np.logical_and(times > 0, times < t)]
     self.assertGreater(dtwf_times.shape[0], 0)
     self.assertTrue(np.all(dtwf_times == np.floor(dtwf_times)))
     coalescent_times = times[times > t]
     self.assertGreater(coalescent_times.shape[0], 0)
     self.assertTrue(np.all(coalescent_times != np.floor(coalescent_times)))
コード例 #10
0
ファイル: test_models.py プロジェクト: winni2k/msprime
 def test_wf_hudson_ancient_samples(self):
     Ne = 10
     t = 10
     n = 20
     ts = msprime.simulate(
         samples=[msprime.Sample(time=j, population=0) for j in range(n)],
         Ne=Ne,
         model=["dtwf", (t, "hudson")],
         random_seed=2,
     )
     tree = ts.first()
     self.assertEqual(tree.num_roots, 1)
     times = ts.tables.nodes.time[ts.tables.nodes.flags == 0]
     dtwf_times = times[np.logical_and(times > 0, times < t)]
     self.assertGreater(dtwf_times.shape[0], 0)
     self.assertTrue(np.all(dtwf_times == np.floor(dtwf_times)))
     coalescent_times = times[times > t]
     self.assertGreater(coalescent_times.shape[0], 0)
     self.assertTrue(np.all(coalescent_times != np.floor(coalescent_times)))
コード例 #11
0
def runSimulator(simNum, chrNum, seedNum, Ne4, T, Ne3, Ne1, recRate):
    # San Nicolas demographic model, moving backwards in time from the year 2000
    demographic_events = [
        msprime.PopulationParametersChange(time=40,
                                           initial_size=10,
                                           population_id=0),
        msprime.PopulationParametersChange(time=42,
                                           initial_size=Ne3,
                                           population_id=0),
        msprime.PopulationParametersChange(time=T,
                                           initial_size=Ne4,
                                           population_id=0),
        msprime.PopulationParametersChange(time=8012,
                                           initial_size=20000,
                                           population_id=0)
    ]
    # Sample one individual (two haplotypes) in 1929, two individuals in 1988, and
    # one individual in 2000
    samples = [
        msprime.Sample(population=0, time=71),
        msprime.Sample(population=0, time=71),
        msprime.Sample(population=0, time=12),
        msprime.Sample(population=0, time=12),
        msprime.Sample(population=0, time=12),
        msprime.Sample(population=0, time=12),
        msprime.Sample(population=0, time=0),
        msprime.Sample(population=0, time=0)
    ]
    # Define parameters for the simulation
    tree_sequence = msprime.simulate(demographic_events=demographic_events,
                                     samples=samples,
                                     Ne=Ne1,
                                     length=1e7,
                                     recombination_rate=recRate,
                                     mutation_rate=2e-8,
                                     random_seed=seedNum)
    # Output VCF file
    with open('peak_sim' + str(seedNum) + '.vcf', 'w') as vcf_file:
        tree_sequence.write_vcf(vcf_file, 2, str(chrNum))
コード例 #12
0
    def get_samples(self, *args):
        """
        Returns a list of msprime.Sample objects as described by the args and
        keyword args. Positional arguments are interpreted as the number of
        samples to take from the given population.

        .. todo:: Add a description how the positional arguments work and
            perhaps link into a section of the tutorial showing it in action.

        """
        samples = []
        for pop_index, n in enumerate(args):
            if self.populations[pop_index].allow_samples:
                sample = msprime.Sample(
                    pop_index, time=self.populations[pop_index].sampling_time)
                samples.extend([sample] * n)
            elif n > 0:
                raise ValueError(
                    "Samples requested from non-sampling population"
                    f" {pop_index}")
        return samples
コード例 #13
0
def get_samples(dg, pop_ids, sample_sizes):
    """
    Get the samples list for the given population names and sample sizes.
    Samples can only be taken from populations that are leaves, and we assume
    that the sampling occurs at the end of that node in the graph.

    To get the time of the end of each leaf, we get all leaves accumulated
    end times since the root, take the max over those accumulated times, and
    subtract each leaf's time from the max.

    Need to have the pop_indexes that the population configurations
    """
    pop_configs, pop_indexes = get_population_configurations(dg, [], {}, 1)
    leaf_times = util.get_accumulated_times(dg)
    max_leaf_time = max(leaf_times.values())
    samples = []
    for pop, ns in zip(pop_ids, sample_sizes):
        assert pop in dg.leaves, "samples can only be taken from leaves"
        pop_time = max_leaf_time - leaf_times[pop]
        samples.extend([msprime.Sample(pop_indexes[pop], time=pop_time)] * ns)
    return samples
コード例 #14
0
 def test_sample_combination_errors(self):
     # Make sure that the various ways we can specify the samples
     # operate correctly.
     s = msprime.Sample(time=0.0, population=0)
     self.assertRaises(ValueError, msprime.simulator_factory)
     # Cannot provide sample_size with either population configurations
     # or samples
     self.assertRaises(ValueError,
                       msprime.simulator_factory,
                       sample_size=2,
                       samples=[s, s])
     pop_configs = [msprime.PopulationConfiguration(sample_size=2)]
     self.assertRaises(
         ValueError,
         msprime.simulator_factory,
         sample_size=2,
         population_configurations=pop_configs,
     )
     # If we provide samples and population_configurations we cannot
     # have a sample size for the config.
     pop_configs = [msprime.PopulationConfiguration(sample_size=2)]
     self.assertRaises(
         ValueError,
         msprime.simulator_factory,
         samples=[s, s],
         population_configurations=pop_configs,
     )
     pop_configs = [
         msprime.PopulationConfiguration(sample_size=None),
         msprime.PopulationConfiguration(sample_size=2),
     ]
     self.assertRaises(
         ValueError,
         msprime.simulator_factory,
         samples=[s, s],
         population_configurations=pop_configs,
     )
コード例 #15
0
ファイル: models.py プロジェクト: ragreenburg/stdpopsim
 def get_samples(self, *args):
     """
     Returns a list of msprime.Sample objects, with the number of samples
     from each population determined by the positional arguments.
     For instance, ``model.get_samples(2, 5, 7)`` would return a list of 14 samples,
     two of which are from the model's first population (i.e., with population ID
     ``model.populations[0].id``), five are from the model's second population,
     and seven are from the model's third population.
     The number of of arguments must be less than or equal to the number of
     "sampling" populations, ``model.num_sampling_populations``;
     if the number of arguments is less than the number of sampling populations,
     then remaining numbers are treated as zero.
     """
     samples = []
     for pop_index, n in enumerate(args):
         if self.populations[pop_index].allow_samples:
             sample = msprime.Sample(
                 pop_index, time=self.populations[pop_index].sampling_time)
             samples.extend([sample] * n)
         elif n > 0:
             raise ValueError(
                 "Samples requested from non-sampling population"
                 f" {pop_index}")
     return samples
コード例 #16
0
    def _get_nsamples(self):
        """
        If tips are not ultrametric then individuals must be entered to 
        sim using the samples=[ms.Sample(popname, time), ...] format. If 
        tips are ultrametric then this should be empty (None).
        """
        # set to None and return
        if self._tips_are_ultrametric:
            self._samples = None
            return

        # create a list of sample tuples: [(popname, time), ...]
        self._samples = []

        # iterate over all sampled tips
        for otip, tip in enumerate(self.tree.get_tip_labels()):

            # get height of this tip
            height = int(self.tip_to_heights[tip])
            nsamples = self.sampledict[tip]

            # add for each nsamples
            for _ in range(nsamples):
                self._samples.append(ms.Sample(otip, height))
コード例 #17
0
def neanderthal_admixture_model(num_modern=1000,
                                anc_pop=1,
                                anc_num=1,
                                anc_time=900,
                                mix_time=2000,
                                split_time=120000,
                                f=0.03,
                                Ne0=10000,
                                Ne1=2500,
                                mu=1.5e-8,
                                rho=1.0e-8,
                                length=10000000,
                                window_size=1000000,
                                num_SNP=1,
                                num_rep=100,
                                coverage=False):
    #when is best time to sample Neanderthal? 100 gen before f?
    #error catching, leave there for now
    if f < 0 or f > 1:
        print "Admixture fraction is not in [0,1]"
        return None
    samples = [msp.Sample(population=0, time=0)
               ] * num_modern  #sample 1 Neanderthal for comparison
    samples.extend([msp.Sample(population=anc_pop, time=anc_time)] * (anc_num))
    pop_config = [
        msp.PopulationConfiguration(initial_size=Ne0),
        msp.PopulationConfiguration(initial_size=Ne1)
    ]
    divergence = [
        msp.MassMigration(time=mix_time, source=0, destination=1,
                          proportion=f),
        msp.MassMigration(time=split_time,
                          source=1,
                          destination=0,
                          proportion=1.0)
    ]
    sims = msp.simulate(samples=samples,
                        Ne=Ne0,
                        population_configurations=pop_config,
                        demographic_events=divergence,
                        mutation_rate=mu,
                        recombination_rate=rho,
                        length=length,
                        num_replicates=num_rep)
    win = []
    freq = []
    leng = []
    #FYI mean fragment length from test_2 model ~6000 bp
    for sim in sims:
        cur_win = 1
        cur_start = 0
        cur_end = window_size - 1
        cur_site = (cur_start +
                    cur_end) / 2.0  #random.randint(cur_start,cur_end)
        #print cur_start, cur_end, cur_site
        for tree in sim.trees():
            F_int = tree.get_interval()
            if cur_site >= F_int[0] and cur_site < F_int[1]:
                #print cur_site, F_int
                #raw_input()
                cur_node = len(
                    samples
                ) - 1  #the very last leaf, when adding more modern pops make sure Neanderthal is still last
                while tree.get_time(tree.get_parent(cur_node)) < split_time:
                    cur_node = tree.get_parent(cur_node)
                F_length = tree.get_length()
                N_freq = (tree.get_num_leaves(cur_node) - 1
                          )  #minus our lone Neanderthal
                win.append(cur_win)
                freq.append(N_freq)
                leng.append(F_length)
                cur_start += window_size
                cur_end += window_size
                if cur_end > length:
                    break
                cur_win += 1
                cur_site = (cur_start +
                            cur_end) / 2.0  #random.randint(cur_start,cur_end)
                #print cur_start, cur_end, cur_site
    outfile = open('outfile_s.txt', 'w')
    outfile.write("window\tfrequency\tlength")
    outfile.write('\n')
    for line in range(0, len(leng)):
        outfile.write(str(win[line]))
        outfile.write('\t')
        outfile.write(str(freq[line]))
        outfile.write('\t')
        outfile.write(str(leng[line]))
        outfile.write('\n')
    outfile.close()
    return np.array(win), np.array(freq), np.array(leng)
コード例 #18
0
"""
Example of using the stdpopsim library with msprime.
"""
import msprime
import stdpopsim.h_sapiens as h_sap

model = h_sap.models.GutenkunstThreePopOutOfAfrica()

model.debug()

# One sample each from YRI, CEU and CHB. There's no point in pushing
# the sampling strategy into the model generation
samples = [
    msprime.Sample(population=0, time=0),
    msprime.Sample(population=1, time=0),
    msprime.Sample(population=2, time=0)
]

ts = msprime.simulate(samples=samples,
                      length=h_sap.chr22.length,
                      recombination_rate=h_sap.chr22.mean_recombination_rate,
                      mutation_rate=h_sap.chr22.mean_mutation_rate,
                      **model.asdict())

# print(ts.tables)
print("simulated:", ts.num_trees, ts.num_sites)
コード例 #19
0
    def run_model(self):
        # Load recomb map
        recomb_map = msprime.RecombinationMap.read_hapmap(self.infile)

        # initial population sizes:
        N_bronze = 50000
        N_Yam = 20000
        N_baa = 10000
        N_whg = 10000
        N_ehg = 10000
        N_neo = 50000
        N_chg = 10000
        N_A = 5000  # Ancestor of WHG and EHG
        N_B = 5000  # Ancestor of CHG and Neolithic farmers

        # Time of events
        T_bronze = 150
        T_Yam = 200
        T_neo = 250
        T_baa = 275
        T_near_east = 800
        T_europe = 500
        T_basal = 1500

        # Growth rate and initial population size for present day from bronze age
        r_EU = 0.067
        N_present = N_bronze / math.exp(-r_EU * T_bronze)

        #Populations: 0=present/bronze/neolithic_farmers/Ana/B,1=Yam/CHG,2=WHG/A, 3=EHG, 4=BAA
        population_configurations = [
            msprime.PopulationConfiguration(initial_size=N_present,
                                            growth_rate=r_EU),
            msprime.PopulationConfiguration(initial_size=N_Yam),
            msprime.PopulationConfiguration(initial_size=N_whg),
            msprime.PopulationConfiguration(initial_size=N_ehg),
            msprime.PopulationConfiguration(initial_size=N_baa)
        ]
        bronze_formation = [
            msprime.MassMigration(time=T_bronze,
                                  source=0,
                                  dest=1,
                                  proportion=0.5),
            msprime.PopulationParametersChange(time=T_bronze,
                                               initial_size=N_neo,
                                               growth_rate=0,
                                               population=0)
        ]
        yam_formation = [
            msprime.MassMigration(time=T_Yam, source=1, dest=3,
                                  proportion=0.5),
            msprime.PopulationParametersChange(time=T_Yam,
                                               initial_size=N_chg,
                                               population=1),
            msprime.MigrationRateChange(time=T_Yam,
                                        rate=self.hg_mig_rate,
                                        matrix_index=(2, 3)),
            msprime.MigrationRateChange(time=T_Yam,
                                        rate=self.hg_mig_rate,
                                        matrix_index=(3, 2))
        ]
        european_neolithic = [
            msprime.MassMigration(time=T_neo,
                                  source=0,
                                  dest=2,
                                  proportion=1.0 / 4.0)
        ]
        baa_formation = [
            msprime.MassMigration(time=T_baa,
                                  source=4,
                                  dest=1,
                                  proportion=1.0 / 4.0)
        ]
        ana_split = [
            msprime.MassMigration(time=276, source=4, dest=0, proportion=1)
        ]
        hg_split = [
            msprime.MassMigration(time=T_europe,
                                  source=3,
                                  dest=2,
                                  proportion=1),
            msprime.MigrationRateChange(time=T_europe, rate=0),
            msprime.PopulationParametersChange(time=T_europe,
                                               initial_size=N_A,
                                               population=2)
        ]
        near_east_split = [
            msprime.MassMigration(time=T_near_east,
                                  source=1,
                                  dest=0,
                                  proportion=1),
            msprime.PopulationParametersChange(time=T_near_east,
                                               initial_size=N_B,
                                               population=0)
        ]
        basal_split = [
            msprime.MassMigration(time=T_basal, source=2, dest=0, proportion=1)
        ]
        demographic_events = bronze_formation + yam_formation + european_neolithic + baa_formation + ana_split + hg_split + near_east_split + basal_split

        # Define samples
        samples = []
        for i, p in enumerate(self.populations):
            sample = [msprime.Sample(time=self.sample_times[i], population=p)]
            samples = samples + sample * self.nhaps[i]

        # Debugging the demography
        migration_matrix = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
        dd = msprime.DemographyDebugger(
            population_configurations=population_configurations,
            migration_matrix=migration_matrix,
            demographic_events=demographic_events)
        dd.print_history()

        # Simulate chromosome 3 only
        tree_sequence = msprime.simulate(
            recombination_map=recomb_map,
            mutation_rate=self.mutation_rate,
            population_configurations=population_configurations,
            demographic_events=demographic_events,
            samples=samples)
        return tree_sequence
コード例 #20
0
def twopop_pulse_migration_slim2(out_dir, seed):
    """
    Two populations with different sizes and introgression from pop2 to pop1.
    Burn-in is disabled. Time and Ne are rescaled by a factor of 10.
    """
    return _twopop_IM("slim",
                      out_dir,
                      seed,
                      pulse=_pulse_m21,
                      slim_burn_in=0,
                      slim_scaling_factor=10)


_ancient_samples = 50 * [
    msprime.Sample(0, time=0),
    msprime.Sample(1, time=500)
]


def twopop_ancient_samples_msprime1(out_dir, seed):
    """
    Two populations, with ancient sampling of the second population.
    """
    return _twopop_IM("msprime", out_dir, seed, samples=_ancient_samples)


def twopop_ancient_samples_slim1(out_dir, seed):
    """
    Two populations, with ancient sampling of the second population.
    """
コード例 #21
0
ファイル: coal_cov.py プロジェクト: aabiddanda/aDNA_LD_public
    def __init__(self,
                 ta,
                 n0=1,
                 na=1,
                 Ne=1e4,
                 rec_rate=1e-4,
                 loci=2,
                 reps=100):
        """Initialize the model."""
        generation_time = 25
        T_AF = 148e3 / generation_time
        T_OOA = 51e3 / generation_time
        T_EU0 = 23e3 / generation_time
        T_EG = 5115 / generation_time

        # Growth rates
        r_EU0 = 0.00307
        r_EU = 0.0195
        r_AF = 0.0166

        # population sizes
        N_A = 7310
        N_AF1 = 14474
        N_B = 1861
        N_EU0 = 1032
        N_EU1 = N_EU0 / np.exp(-r_EU0 * (T_EU0 - T_EG))

        # migration rates
        m_AF_B = 15e-5
        m_AF_EU = 2.5e-5

        # present Ne
        N_EU = N_EU1 / np.exp(-r_EU * T_EG)
        N_AF = N_AF1 / np.exp(-r_AF * T_EG)

        population_configurations = [
            msp.PopulationConfiguration(initial_size=N_AF, growth_rate=r_AF),
            msp.PopulationConfiguration(initial_size=N_EU, growth_rate=r_EU),
        ]
        migration_matrix = [[0, m_AF_EU], [m_AF_EU, 0]]
        demographic_events = [
            msp.MigrationRateChange(time=T_EG,
                                    rate=m_AF_EU,
                                    matrix_index=(0, 1)),
            msp.MigrationRateChange(time=T_EG,
                                    rate=m_AF_EU,
                                    matrix_index=(1, 0)),
            msp.PopulationParametersChange(time=T_EG,
                                           growth_rate=r_EU0,
                                           initial_size=N_EU1,
                                           population_id=1),
            msp.PopulationParametersChange(time=T_EG,
                                           growth_rate=0,
                                           initial_size=N_AF1,
                                           population_id=0),
            msp.MigrationRateChange(time=T_EU0,
                                    rate=m_AF_B,
                                    matrix_index=(0, 1)),
            msp.MigrationRateChange(time=T_EU0,
                                    rate=m_AF_B,
                                    matrix_index=(1, 0)),
            msp.PopulationParametersChange(time=T_EU0,
                                           initial_size=N_B,
                                           growth_rate=0,
                                           population_id=1),
            msp.MassMigration(time=T_OOA,
                              source=1,
                              destination=0,
                              proportion=1.0),
            msp.PopulationParametersChange(time=T_AF,
                                           initial_size=N_A,
                                           population_id=0),
        ]

        self.pop_config = population_configurations
        self.migration_matrix = migration_matrix
        self.demography = demographic_events
        self.rec_rate = rec_rate
        self.loci = loci
        self.samples1 = [msp.Sample(population=1, time=0) for i in range(n0)]
        self.samples2 = [msp.Sample(population=1, time=ta) for i in range(na)]
        self.samples = self.samples1 + self.samples2
        self.reps = reps
        self.Ne = Ne
        self.treeseq = None
コード例 #22
0
import msprime
import numpy as np
import matplotlib.pyplot as plt

# number of generations
t = 3
# number of chromosomes sampled
S_dip = 50
S_hap = S_dip * 2
Ne = 200
mutation_rate = 1e-9
length = 2e8
recom_rate = 0
reps = 300

samples = [msprime.Sample(population=0, time=0) for i in range(S_hap)
           ] + [msprime.Sample(population=0, time=3) for i in range(S_hap)]


def fc_variant(genotype, S_hap):
    # allele 1 :
    xi_1 = sum(v.genotypes[:S_hap]) / S_hap
    yi_1 = sum(v.genotypes[S_hap:]) / S_hap

    xi_2 = 1 - xi_1
    yi_2 = 1 - yi_1

    if xi_1 > 40 / 400 and xi_1 < 360 / 400:
        if xi_2 > -40 / 400:
            #print(xi_1,yi_1,xi_2, yi_2)
コード例 #23
0
"""
Example simulation for analysis
"""
import msprime


# import stdpopsim
from stdpopsim import homo_sapiens

chrom = homo_sapiens.genome.chromosomes["chr22"]
recomb_map = chrom.recombination_map()

model = homo_sapiens.GutenkunstThreePopOutOfAfrica()
# model.debug()

# Currently sampling 20 individuals from a single popn.
tmp_samples = [
    msprime.Sample(population=0, time=0)
]
samples = tmp_samples * 20
ts = msprime.simulate(
    samples=samples,
    recombination_map=chrom.recombination_map(),
    mutation_rate=chrom.mean_mutation_rate,
    **model.asdict())
# Hard coded output name. FIX ME
ts.dump("simulated.trees")
コード例 #24
0
    times = sorted(list(thinned_configs.keys())) + [-1]
    epochs = [(t0, t1) for t0,t1 in zip(times[:-1],times[1:])]
    epoch_configs = {}
    for e in epochs:
        epoch_configs[e] = thinned_configs[e[0]]
    return epoch_configs

if "__name__" == "__main__":
    ## your simulation inputs here
    import homo_sapiens
    dg = homo_sapiens.ooa_gutenkunst()
    
    pop_config, mig_mat, demo_events = dg.msprime_inputs()
    
    ## set up the samples you want
    # in the OOA model as defined here, YRI is pop 3, CEU is 4, and CHB is 5
    # let's take 10 samples from each
    samples = [
        msprime.Sample(population=3, time=0) for i in range(10)
    ] + [
        msprime.Sample(population=4, time=0) for i in range(10)
    ]+ [
        msprime.Sample(population=5, time=0) for i in range(10)
    ]
    
    sampling_times = get_sampling_times(samples)
    
    epoch_configs = get_epochs(pop_config, mig_mat, demo_events,
                               sampling_times)

コード例 #25
0
ファイル: simulate.py プロジェクト: rwaples/ABCadmix_dev
def sim_ongoing_interval(rec_map=None, L=3e9, Ne=10000, Nadmix=500,
                Tadmix_start=4, Tadmix_stop=12, frac_ongoing=0.05,
                seed=None, path=None, tszip=None):

    """
    Simulate an ongoing model of admixture.

    With the disrete-time backwards wright-fisher.

    A new population (2) is formed by splitting off from population 0.
    At time=Tadmix_start migration starts from population 1,
    with rate frac_ongoing admixture continues until Tadmix_stop.

    rec_map = valid msprime recombination map
    L = length of genome, in base pairs (ignored if rec_map is specified)

    Ne = diploid population size for all three populations
    Tadmix = time of admixture
    Nadmix = number of observed admixed individuals
    seed = seed to pass to msprime.simulate
    path = file path, if given will write the ts to this path (NOT IMPLEMENTED)
    """

    assert Tadmix_stop > Tadmix_start, "Tadmix_stop must be greater than Tadmix_start"

    Tadmix_start = int(Tadmix_start)
    Tadmix_stop = int(Tadmix_stop)
    Ne = int(Ne)
    Nadmix = int(Nadmix)

    # recombination map
    if rec_map:
        recomb_map = rec_map
    else:
        L = int(L)
        recomb_map = msprime.RecombinationMap.uniform_map(L, 1e-8, L)

    pop_configs = [
        msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0),
        msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0),
        msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0)
        ]

    mig_mat = [
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
    ]

    admixture_events = [
        # migration during the interval Tadmix_start - Tadmix_stop
        msprime.MigrationRateChange(time=Tadmix_start, rate=frac_ongoing, matrix_index=(2, 1)),
        msprime.MigrationRateChange(time=Tadmix_stop, rate=0, matrix_index=(2, 1)),
        # founding of pop 2
        msprime.MassMigration(time=Tadmix_stop + 1, source=2, destination=0, proportion=1.0),
    ]

    samps = [msprime.Sample(population=2, time=0)] * 2 * Nadmix

    ts_admix = msprime.simulate(
        population_configurations=pop_configs,
        migration_matrix=mig_mat,
        demographic_events=admixture_events,
        recombination_map=recomb_map,
        mutation_rate=0,
        model='dtwf',
        samples=samps,
        random_seed=seed,
        start_time=0,
        end_time=Tadmix_stop + 2
    )

    return(ts_admix)
コード例 #26
0
ファイル: simulate.py プロジェクト: rwaples/ABCadmix_dev
def sim_two_pulse(rec_map=None, L=1e9, Ne=10000, Nadmix=500,
                T1=4, T2=12, frac1=.2, frac2=.2,
                seed=None, path=None, tszip=None):
    """Simulate a simple pulse model of admixture.

    Using the disrete-time backwards wright-fisher.

    rec_map = valid msprime recombination map
    L = length of genome, in base pairs (ignored if rec_map is specified)

    Ne = diploid population size for all three populations
    Tadmix = time of admixture
    Nadmix = number of observed admixed diploid individuals
    seed = seed passed to msprime.simulate()
    path = file path, if given will write the ts to this path
    """

    assert T2 > T1, "T2 must be greater than T1"

    # convert to correct dtypes and catch problems
    T1 = int(T1)
    T2 = int(T2)
    Ne = int(Ne)
    Nadmix = int(Nadmix)

    # recombination map
    if rec_map:
        recomb_map = rec_map
    else:
        L = int(L)
        recomb_map = msprime.RecombinationMap.uniform_map(L, 1e-8, L)

    pop_configs = [
        msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0),
        msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0),
        msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0)
    ]

    # no ongoing migration
    mig_mat = [
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
    ]

    admixture_events = [
        msprime.MassMigration(time=T1, source=2, destination=1, proportion=frac1),
        msprime.MassMigration(time=T2, source=2, destination=1, proportion=frac2),
        msprime.MassMigration(time=T2 + 1, source=2, destination=0, proportion=1.0),
    ]

    samps = [msprime.Sample(population=2, time=0)] * 2 * Nadmix

    ts_admix = msprime.simulate(
        population_configurations=pop_configs,
        migration_matrix=mig_mat,
        demographic_events=admixture_events,
        recombination_map=recomb_map,
        mutation_rate=0,
        model='dtwf',
        samples=samps,
        random_seed=seed,
        start_time=0,
        end_time=T2 + 2
    )

    if path:
        if tszip:
            # save compressed ts
            import tszip
            tszip.compress(ts_admix, path, variants_only=False)
        else:
            # save uncompressed ts
            ts_admix.dump(path)

    return(ts_admix)
コード例 #27
0
"""
Example of using the stdpopsim library with msprime.
"""
import msprime


import stdpopsim
from stdpopsim import drosophila_melanogaster

chrom = drosophila_melanogaster.genome.chromosomes["chrX"]
recomb_map = chrom.recombination_map()

model = drosophila_melanogaster.SheehanSongThreeEpoch()
model.debug()

samples = [msprime.Sample(population=0, time=0),msprime.Sample(population=0,
                                                               time=0)]

ts = msprime.simulate(
    samples=samples,
    recombination_map=chrom.recombination_map(),
    mutation_rate=chrom.mean_mutation_rate,
    **model.asdict())
print("simulated:", ts.num_trees, ts.num_sites)
コード例 #28
0
infection_size = 1
stable_pop_size = 100
# ## subpops based on infected people, all subpops that want to exist at end of sim (present time) need stated here
pop_list = [PopSource]
sample_list = []
#Setting up the end, so all pops exist and at stable pop size, no death in simulation time
for pop in range(final_num_pops):
    #    print(pop)
    pop_list.append(
        msprime.PopulationConfiguration(initial_size=stable_pop_size,
                                        growth_rate=0))
    #historical samples rather than contemporaneous ones, 1 week after infection
    for sample in range(sample_size):
        if sample < sample_size // 2:
            sample_list.append(
                msprime.Sample(population=(pop + 1), time=gens_list[pop] - 30))
        else:
            sample_list.append(msprime.Sample(population=(pop + 1), time=0))

# no migration between sources accross time, only infection events,
# so migration matrix is zeros
M = np.zeros((final_num_pops + 1, final_num_pops + 1))

# Now get transmission events from the data. Use index as population number, but +1 since have fake source pop at index 0.
#for i in list_of_root_and_kids:
#    print(list_of_root_and_kids.index(i) + 1)

####--- new version with sub-pops ---####

## a simple model where independent sub-pop is infection derived from source pop
# if infected by true pop, need to state when diverged from past pop if that's the case
# Split times (years)
OutOfafrica = 62000/gen_time
Denisova_split = 350000 
IntrogressingVindijaSplit = 90000
DenisovaNeanderthal = 420000

# Admix parameters (years and admixture proportions (in percent))
Denisova_admix_time = 45000 
DenisovaProportion = 0.08
admix_into = 1 # (1 for humans, 5 for neanderthals)


# Number of samples
n_ingroup = 2
African_samples = 1000
samples = [msp.Sample(0, 0)]*African_samples + [msp.Sample(1, 0)]*n_ingroup + [msp.Sample(3, 80000/gen_time)]*n_ingroup + [msp.Sample(4, 120000/gen_time)]*n_ingroup + [msp.Sample(6, 60000/gen_time)]*n_ingroup

population_configurations = [
    msp.PopulationConfiguration(initial_size = Ne_Africa), #0
    msp.PopulationConfiguration(initial_size = Ne_Europe), #1
    msp.PopulationConfiguration(initial_size = Denisovasize), #2
    msp.PopulationConfiguration(initial_size = Denisovasize), #3
    msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #4
    msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #5
    msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #6
]


demographic_events_dict = {

コード例 #30
0
def neanderthal_admixture_model(num_eu=170,
                                num_as=394,
                                num_nean=1,
                                anc_time=900,
                                mix_time1=2000,
                                mix_time2=1000,
                                mix_time3=1000,
                                mix_time4=1000,
                                split_time_1=120000,
                                split_time_2=2300,
                                split_time_3=1500,
                                f1=0.022,
                                f2=0.00,
                                f3=0.00,
                                f4=0.00,
                                Ne0=10000,
                                Ne1=2500,
                                Ne2=10000,
                                mu=1.5e-8,
                                window_size=100000,
                                num_SNP=1,
                                num_rep=1,
                                coverage=False):
    infile = "chr1_map_5000.txt"
    rho_map = msp.RecombinationMap.read_hapmap(infile)
    samples = [msp.Sample(population=0, time=0)] * num_eu
    samples.extend([msp.Sample(population=1, time=0)] *
                   num_as)  #no sampling of Basal Eurasian pop
    samples.extend([msp.Sample(population=3, time=anc_time)] *
                   (num_nean))  #sample 1 Neanderthal for comparison
    pop_config = [
        msp.PopulationConfiguration(initial_size=Ne0),
        msp.PopulationConfiguration(initial_size=Ne0),
        msp.PopulationConfiguration(initial_size=Ne0),
        msp.PopulationConfiguration(initial_size=Ne1)
    ]
    divergence = [
        msp.MassMigration(time=mix_time4,
                          source=0,
                          destination=2,
                          proportion=f4),  #BE dilution into EU
        msp.MassMigration(time=mix_time3,
                          source=0,
                          destination=3,
                          proportion=f3),  #second pulse EU
        msp.MassMigration(time=mix_time2,
                          source=1,
                          destination=3,
                          proportion=f2),  #second pulse AS
        msp.MassMigration(time=split_time_3,
                          source=0,
                          destination=1,
                          proportion=1.0),  #EU AS split
        msp.MassMigration(time=mix_time1,
                          source=1,
                          destination=3,
                          proportion=f1),  #first pulse
        msp.MassMigration(time=split_time_2,
                          source=1,
                          destination=2,
                          proportion=1.0),  #BE AS split
        msp.MassMigration(time=split_time_1,
                          source=3,
                          destination=2,
                          proportion=1.0)
    ]  # Neand AS split
    sims = msp.simulate(samples=samples,
                        Ne=Ne0,
                        population_configurations=pop_config,
                        demographic_events=divergence,
                        mutation_rate=mu,
                        recombination_map=rho_map,
                        num_replicates=num_rep)
    print "done simulating"
    win = []
    freq_EU = []
    freq_AS = []
    last = np.array(rho_map.get_positions()[-1])
    #leng = []
    cur_sim = 0
    for sim in sims:
        cur_win = 1
        cur_start = 0
        cur_end = window_size - 1
        cur_site = (cur_start + cur_end) / 2.0
        cur_sim += 1
        print "current simulation"
        print cur_sim
        trees = sim.trees()
        while True:
            cur_tree = trees.next()
            F_int = cur_tree.get_interval()
            print F_int
            raw_input()
        for tree in sim.trees():
            F_int = tree.get_interval()
            print cur_site, F_int
            raw_input()
            if cur_site >= F_int[0] and cur_site < F_int[1]:
                cur_node = len(
                    samples
                ) - 1  #the very last leaf, when adding more modern pops make sure Neanderthal is still last
                while tree.get_time(tree.get_parent(cur_node)) < split_time_1:
                    cur_node = tree.get_parent(cur_node)
                #F_length = tree.get_length()
                N_freq_EU = 0
                N_freq_AS = 0
                for leaf in tree.leaves(cur_node):
                    if tree.get_population(leaf) == 0:
                        N_freq_EU += 1
                    elif tree.get_population(leaf) == 1:
                        N_freq_AS += 1
                win.append(cur_win)
                freq_EU.append(N_freq_EU)
                freq_AS.append(N_freq_AS)
                #leng.append(F_length)
                cur_start += window_size
                cur_end += window_size
                print cur_end
                print last
                if cur_end > last:
                    break
                cur_win += 1
                print cur_win
                cur_site = (cur_start +
                            cur_end) / 2.0  #random.randint(cur_start,cur_end)
    outfile = open('outfile_map_chr1_1f.txt', 'w')
    outfile.write("window\tfrequency_EU\tfrequency_AS")
    outfile.write('\n')
    for line in range(0, len(freq_AS)):
        outfile.write(str(win[line]))
        outfile.write('\t')
        outfile.write(str(freq_EU[line]))
        outfile.write('\t')
        outfile.write(str(freq_AS[line]))
        outfile.write('\t')
        #outfile.write(str(leng[line]))
        #outfile.write('\n')
    outfile.close()
    return np.array(win), np.array(freq_EU), np.array(
        freq_AS)  #, np.array(leng)