def test_alphabet_binary(self): ts = msprime.simulate(10, random_seed=2) mutated = msprime.mutate(ts, rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.BINARY)) self.verify_binary_alphabet(mutated)
def test_infinite_sites_acgt_n2(self): ts = msprime.simulate(2, random_seed=1) ts = msprime.mutate(ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=1) self.verify(ts)
def test_simple_nucleotide(self): ts = msprime.mutate(msprime.simulate(10, random_seed=2), rate=1, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.assertGreater(ts.num_sites, 0) self.verify(ts, 2, random_seed=3)
def test_alphabet_nucleotide(self): ts = msprime.simulate(10, random_seed=2) mutated = msprime.mutate(ts, rate=1, random_seed=2, model=msprime.InfiniteSites( msprime.NUCLEOTIDES)) self.verify_nucleotides_alphabet(mutated)
def parsimony(): tree = msprime.simulate(6, random_seed=42).first() alleles = ["red", "blue", "green"] genotypes = [0, 0, 0, 0, 1, 2] node_colours = {j: alleles[g] for j, g in enumerate(genotypes)} ancestral_state, mutations = tree.map_mutations(genotypes, alleles) print("Ancestral state = ", ancestral_state) for mut in mutations: print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}") tree.draw("_static/parsimony1.svg", node_colours=node_colours) ts = msprime.simulate(6, random_seed=23) ts = msprime.mutate( ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=2) tree = ts.first() tables = ts.dump_tables() # Reinfer the sites and mutations from the variants. tables.sites.clear() tables.mutations.clear() for var in ts.variants(): ancestral_state, mutations = tree.map_mutations(var.genotypes, var.alleles) tables.sites.add_row(var.site.position, ancestral_state=ancestral_state) parent_offset = len(tables.mutations) for mutation in mutations: parent = mutation.parent if parent != tskit.NULL: parent += parent_offset tables.mutations.add_row( var.index, node=mutation.node, parent=parent, derived_state=mutation.derived_state) assert tables.sites == ts.tables.sites assert tables.mutations == ts.tables.mutations print(tables.sites) print(tables.mutations) tree = msprime.simulate(6, random_seed=42).first() alleles = ["red", "blue", "green", "white"] genotypes = [-1, 0, 0, 0, 1, 2] node_colours = {j: alleles[g] for j, g in enumerate(genotypes)} ancestral_state, mutations = tree.map_mutations(genotypes, alleles) print("Ancestral state = ", ancestral_state) for mut in mutations: print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}") tree.draw("_static/parsimony2.svg", node_colours=node_colours) tree = msprime.simulate(6, random_seed=42).first() alleles = ["red", "blue", "white"] genotypes = [1, -1, 0, 0, 0, 0] node_colours = {j: alleles[g] for j, g in enumerate(genotypes)} ancestral_state, mutations = tree.map_mutations(genotypes, alleles) print("Ancestral state = ", ancestral_state) for mut in mutations: print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}") tree.draw("_static/parsimony3.svg", node_colours=node_colours)
def test_simple_acgt(self): ts = msprime.simulate(5, random_seed=3) ts = msprime.mutate( ts, rate=4, random_seed=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.assertGreater(ts.num_sites, 3) valid_alleles = [ tskit.ALLELES_ACGT, ("A", "C", "T", "G", "AAAAAAAAAAAAAA"), ("AA", "CC", "TT", "GG", "A", "C", "T", "G"), ] for alleles in valid_alleles: self.verify(ts, alleles)
def test_missing_alleles(self): ts = msprime.simulate(10, random_seed=2) ts = msprime.mutate( ts, rate=4, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.assertGreater(ts.num_sites, 2) bad_allele_examples = [ tskit.ALLELES_01, tuple(["A"]), ("C", "T", "G"), ("AA", "C", "T", "G"), tuple(["ACTG"])] for bad_alleles in bad_allele_examples: with self.assertRaises(exceptions.LibraryError): ts.genotype_matrix(alleles=bad_alleles) with self.assertRaises(exceptions.LibraryError): list(ts.variants(alleles=bad_alleles))
def test_simple_acgt(self): ts = msprime.simulate(10, random_seed=2) ts = msprime.mutate( ts, rate=4, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) self.assertGreater(ts.num_sites, 2) alleles = tskit.ALLELES_ACGT G = ts.genotype_matrix(alleles=alleles) for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)): self.assertEqual(v2.alleles, alleles) self.assertEqual(v1.site, v2.site) h1 = "".join(v1.alleles[g] for g in v1.genotypes) h2 = "".join(v2.alleles[g] for g in v2.genotypes) self.assertEqual(h1, h2) self.assertTrue(np.array_equal(v2.genotypes, G[v1.site.id]))
def test_identical_seed_alphabets(self): ts = msprime.simulate(10, random_seed=2) binary = msprime.mutate(ts, rate=1, random_seed=2) nucleotides = msprime.mutate(ts, rate=1, random_seed=2, model=msprime.InfiniteSites( msprime.NUCLEOTIDES)) self.assertGreater(binary.num_sites, 0) self.assertGreater(binary.num_mutations, 0) self.assertEqual(binary.num_sites, nucleotides.num_sites) self.assertEqual(binary.num_mutations, nucleotides.num_mutations) for s1, s2 in zip(binary.sites(), nucleotides.sites()): self.assertEqual(s1.position, s2.position) self.assertEqual(s1.mutations[0].node, s2.mutations[0].node)
def test_simple_acgt(self): ts = msprime.simulate(10, random_seed=2) ts = msprime.mutate(ts, rate=4, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES)) assert ts.num_sites > 2 alleles = tskit.ALLELES_ACGT G = ts.genotype_matrix(alleles=alleles) for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)): assert v2.alleles == alleles assert v1.site == v2.site h1 = "".join(v1.alleles[g] for g in v1.genotypes) h2 = "".join(v2.alleles[g] for g in v2.genotypes) assert h1 == h2 assert np.array_equal(v2.genotypes, G[v1.site.id])
y = v[np.logical_not(np.isnan(v))] #idem = vectuer des taux binné #Simulation tree sequence tree_sequence = msprime.simulate( sample_size= 40, #sample_size = 200 pour avoir ensuite 100 individus diploides dans le VCF Ne=2.5 * 10**4, recombination_map=recomb_map) breakpoints = np.array(list( tree_sequence.breakpoints())) #récupère les breakpoints tree_sequence = msprime.mutate( tree_sequence, rate=10**-8, model=msprime.InfiniteSites(alphabet=msprime.NUCLEOTIDES)) #Représentation graphique v, bin_edges = np.histogram(breakpoints, num_bins, density=None) n = 40 x1 = np.arange(1, n, 1) #vecteur de valeurs de 1 à 99 x2 = np.full(shape=n - 1, fill_value=1) #vecteurs de 99 "1" x3 = np.divide(x2, x1) #divise le vecteur x2 par x1 (1/1, 1/2, 1/3 ...) somme = np.sum(x3) # somme du vecteur x3 rho = np.divide(v, somme) #on obtient le 4Ner fois la taille des bins r = np.divide(rho, 4 * (2.5 * 10**4) * 500) #Représentation graphique plt.plot(bin_edges[:-1], y, label="recomb_map", color="blue") plt.plot(bin_edges[:-1], r, label="simul_map", color="orange") plt.legend()
## check Ne if Ne != ts.num_individuals: print("ADJUSTING NE TO:", str(ts.num_individuals)) Ne = int(ts.num_individuals) # Recapitate! print("\tRecapitating") recap = ts.recapitate(recombination_rate=recomb, Ne=Ne, random_seed=timestamp) print("\tAdding Mutations") mutated = msprime.mutate( recap, rate=mu, random_seed=timestamp, keep=True, model=msprime.InfiniteSites(alphabet=1)) ## nucleotides print("\tPrinting to file:", newfilename) mutated.dump(newfilename) print("\tCreating VCF") with open(vcffilename, "w") as vcf_file: try: #mutated.write_vcf(vcf_file, ploidy=2) mutated.write_vcf(vcf_file) #mutated.write_vcf(mutated.individuals_alive_at(0)) except: print("\tSimplifying") simple = mutated.simplify() simple.write_vcf(vcf_file)
import msprime, sys # parameters sample_size = int(sys.argv[1]) effective_population_size = int(sys.argv[2]) sequence_length = int(sys.argv[3]) recombination_rate = float(sys.argv[4]) mutation_rate = float(sys.argv[5]) seed = int(sys.argv[6]) # run the simulation # ts = msprime.simulate(sample_size=sample_size, Ne=effective_population_size, length=sequence_length, recombination_rate=recombination_rate, mutation_rate=mutation_rate, random_seed=seed) ts = msprime.simulate(sample_size=sample_size, Ne=effective_population_size, length=sequence_length, recombination_rate=recombination_rate, random_seed=seed) model = msprime.InfiniteSites(msprime.NUCLEOTIDES) ts = msprime.mutate(ts, rate=mutation_rate, model=model, random_seed=seed) # print results with sys.stdout as vcffile: ts.write_vcf(vcffile, 2, position_transform="legacy" ) # 2 is for diploid, "legacy" = no matching positions
# Recapitate! print("\tRecapitating") recap = ts.recapitate(recombination_rate=recomb, Ne=Ne, random_seed=timestamp) ## do not output the recapacitated tree #recap.dump("recipe_16.10_recap.trees") # Plot the tree heights after recapitation # print("\tPlotting tree heights (after): "+"After_Recap_"+filename+".png") # breakpoints = list(recap.breakpoints()) # heights = tree_heights(recap) # plt.step(breakpoints, heights, where='post') # plt.savefig("After_Recap_"+filename+".png") print("\tAdding Mutations") mutated = msprime.mutate(recap, rate=mu, random_seed=timestamp, keep=True, model=msprime.InfiniteSites(alphabet=1)) ## nucleotides ## wont do nucleotides -- kelleher and ralph to fix -- ralph receptive to questions with python side ## can't round positions print("\tPrinting to file:",newfilename) mutated.dump(newfilename) print("\tCreating VCF") with open(vcffilename, "w") as vcf_file: mutated.write_vcf(vcf_file, 2) # Plot the tree heights after recapitation # print("\tPlotting tree heights (mutated)") # breakpoints = list(recap.breakpoints()) # heights = tree_heights(recap) # plt.step(breakpoints, heights, where='post')