Ejemplo n.º 1
0
 def test_alphabet_binary(self):
     ts = msprime.simulate(10, random_seed=2)
     mutated = msprime.mutate(ts,
                              rate=1,
                              random_seed=2,
                              model=msprime.InfiniteSites(msprime.BINARY))
     self.verify_binary_alphabet(mutated)
Ejemplo n.º 2
0
 def test_infinite_sites_acgt_n2(self):
     ts = msprime.simulate(2, random_seed=1)
     ts = msprime.mutate(ts,
                         rate=3,
                         model=msprime.InfiniteSites(msprime.NUCLEOTIDES),
                         random_seed=1)
     self.verify(ts)
Ejemplo n.º 3
0
 def test_simple_nucleotide(self):
     ts = msprime.mutate(msprime.simulate(10, random_seed=2),
                         rate=1,
                         random_seed=2,
                         model=msprime.InfiniteSites(msprime.NUCLEOTIDES))
     self.assertGreater(ts.num_sites, 0)
     self.verify(ts, 2, random_seed=3)
Ejemplo n.º 4
0
 def test_alphabet_nucleotide(self):
     ts = msprime.simulate(10, random_seed=2)
     mutated = msprime.mutate(ts,
                              rate=1,
                              random_seed=2,
                              model=msprime.InfiniteSites(
                                  msprime.NUCLEOTIDES))
     self.verify_nucleotides_alphabet(mutated)
Ejemplo n.º 5
0
def parsimony():

    tree = msprime.simulate(6, random_seed=42).first()
    alleles = ["red", "blue", "green"]
    genotypes = [0, 0, 0, 0, 1, 2]
    node_colours = {j: alleles[g] for j, g in enumerate(genotypes)}
    ancestral_state, mutations = tree.map_mutations(genotypes, alleles)
    print("Ancestral state = ", ancestral_state)
    for mut in mutations:
        print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}")
    tree.draw("_static/parsimony1.svg", node_colours=node_colours)


    ts = msprime.simulate(6, random_seed=23)
    ts = msprime.mutate(
        ts, rate=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES), random_seed=2)

    tree = ts.first()
    tables = ts.dump_tables()
    # Reinfer the sites and mutations from the variants.
    tables.sites.clear()
    tables.mutations.clear()
    for var in ts.variants():
        ancestral_state, mutations = tree.map_mutations(var.genotypes, var.alleles)
        tables.sites.add_row(var.site.position, ancestral_state=ancestral_state)
        parent_offset = len(tables.mutations)
        for mutation in mutations:
            parent = mutation.parent
            if parent != tskit.NULL:
                parent += parent_offset
            tables.mutations.add_row(
                var.index, node=mutation.node, parent=parent,
                derived_state=mutation.derived_state)

    assert tables.sites == ts.tables.sites
    assert tables.mutations == ts.tables.mutations
    print(tables.sites)
    print(tables.mutations)

    tree = msprime.simulate(6, random_seed=42).first()
    alleles = ["red", "blue", "green", "white"]
    genotypes = [-1, 0, 0, 0, 1, 2]
    node_colours = {j: alleles[g] for j, g in enumerate(genotypes)}
    ancestral_state, mutations = tree.map_mutations(genotypes, alleles)
    print("Ancestral state = ", ancestral_state)
    for mut in mutations:
        print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}")
    tree.draw("_static/parsimony2.svg", node_colours=node_colours)

    tree = msprime.simulate(6, random_seed=42).first()
    alleles = ["red", "blue", "white"]
    genotypes = [1, -1, 0, 0, 0, 0]
    node_colours = {j: alleles[g] for j, g in enumerate(genotypes)}
    ancestral_state, mutations = tree.map_mutations(genotypes, alleles)
    print("Ancestral state = ", ancestral_state)
    for mut in mutations:
        print(f"Mutation: node = {mut.node} derived_state = {mut.derived_state}")
    tree.draw("_static/parsimony3.svg", node_colours=node_colours)
Ejemplo n.º 6
0
 def test_simple_acgt(self):
     ts = msprime.simulate(5, random_seed=3)
     ts = msprime.mutate(
         ts, rate=4, random_seed=3, model=msprime.InfiniteSites(msprime.NUCLEOTIDES))
     self.assertGreater(ts.num_sites, 3)
     valid_alleles = [
         tskit.ALLELES_ACGT,
         ("A", "C", "T", "G", "AAAAAAAAAAAAAA"),
         ("AA", "CC", "TT", "GG", "A", "C", "T", "G"),
     ]
     for alleles in valid_alleles:
         self.verify(ts, alleles)
Ejemplo n.º 7
0
 def test_missing_alleles(self):
     ts = msprime.simulate(10, random_seed=2)
     ts = msprime.mutate(
         ts, rate=4, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES))
     self.assertGreater(ts.num_sites, 2)
     bad_allele_examples = [
             tskit.ALLELES_01, tuple(["A"]), ("C", "T", "G"), ("AA", "C", "T", "G"),
             tuple(["ACTG"])]
     for bad_alleles in bad_allele_examples:
         with self.assertRaises(exceptions.LibraryError):
             ts.genotype_matrix(alleles=bad_alleles)
         with self.assertRaises(exceptions.LibraryError):
             list(ts.variants(alleles=bad_alleles))
Ejemplo n.º 8
0
 def test_simple_acgt(self):
     ts = msprime.simulate(10, random_seed=2)
     ts = msprime.mutate(
         ts, rate=4, random_seed=2, model=msprime.InfiniteSites(msprime.NUCLEOTIDES))
     self.assertGreater(ts.num_sites, 2)
     alleles = tskit.ALLELES_ACGT
     G = ts.genotype_matrix(alleles=alleles)
     for v1, v2 in itertools.zip_longest(ts.variants(), ts.variants(alleles=alleles)):
         self.assertEqual(v2.alleles, alleles)
         self.assertEqual(v1.site, v2.site)
         h1 = "".join(v1.alleles[g] for g in v1.genotypes)
         h2 = "".join(v2.alleles[g] for g in v2.genotypes)
         self.assertEqual(h1, h2)
         self.assertTrue(np.array_equal(v2.genotypes, G[v1.site.id]))
Ejemplo n.º 9
0
 def test_identical_seed_alphabets(self):
     ts = msprime.simulate(10, random_seed=2)
     binary = msprime.mutate(ts, rate=1, random_seed=2)
     nucleotides = msprime.mutate(ts,
                                  rate=1,
                                  random_seed=2,
                                  model=msprime.InfiniteSites(
                                      msprime.NUCLEOTIDES))
     self.assertGreater(binary.num_sites, 0)
     self.assertGreater(binary.num_mutations, 0)
     self.assertEqual(binary.num_sites, nucleotides.num_sites)
     self.assertEqual(binary.num_mutations, nucleotides.num_mutations)
     for s1, s2 in zip(binary.sites(), nucleotides.sites()):
         self.assertEqual(s1.position, s2.position)
         self.assertEqual(s1.mutations[0].node, s2.mutations[0].node)
Ejemplo n.º 10
0
 def test_simple_acgt(self):
     ts = msprime.simulate(10, random_seed=2)
     ts = msprime.mutate(ts,
                         rate=4,
                         random_seed=2,
                         model=msprime.InfiniteSites(msprime.NUCLEOTIDES))
     assert ts.num_sites > 2
     alleles = tskit.ALLELES_ACGT
     G = ts.genotype_matrix(alleles=alleles)
     for v1, v2 in itertools.zip_longest(ts.variants(),
                                         ts.variants(alleles=alleles)):
         assert v2.alleles == alleles
         assert v1.site == v2.site
         h1 = "".join(v1.alleles[g] for g in v1.genotypes)
         h2 = "".join(v2.alleles[g] for g in v2.genotypes)
         assert h1 == h2
         assert np.array_equal(v2.genotypes, G[v1.site.id])
Ejemplo n.º 11
0
y = v[np.logical_not(np.isnan(v))]  #idem = vectuer des taux binné

#Simulation tree sequence
tree_sequence = msprime.simulate(
    sample_size=
    40,  #sample_size = 200 pour avoir ensuite 100 individus diploides dans le VCF
    Ne=2.5 * 10**4,
    recombination_map=recomb_map)

breakpoints = np.array(list(
    tree_sequence.breakpoints()))  #récupère les breakpoints

tree_sequence = msprime.mutate(
    tree_sequence,
    rate=10**-8,
    model=msprime.InfiniteSites(alphabet=msprime.NUCLEOTIDES))

#Représentation graphique
v, bin_edges = np.histogram(breakpoints, num_bins, density=None)
n = 40
x1 = np.arange(1, n, 1)  #vecteur de valeurs de 1 à 99
x2 = np.full(shape=n - 1, fill_value=1)  #vecteurs de 99 "1"
x3 = np.divide(x2, x1)  #divise le vecteur x2 par x1 (1/1, 1/2, 1/3 ...)
somme = np.sum(x3)  # somme du vecteur x3
rho = np.divide(v, somme)  #on obtient le 4Ner fois la taille des bins
r = np.divide(rho, 4 * (2.5 * 10**4) * 500)

#Représentation graphique
plt.plot(bin_edges[:-1], y, label="recomb_map", color="blue")
plt.plot(bin_edges[:-1], r, label="simul_map", color="orange")
plt.legend()
## check Ne
if Ne != ts.num_individuals:
    print("ADJUSTING NE TO:", str(ts.num_individuals))
    Ne = int(ts.num_individuals)

# Recapitate!
print("\tRecapitating")
recap = ts.recapitate(recombination_rate=recomb, Ne=Ne, random_seed=timestamp)

print("\tAdding Mutations")
mutated = msprime.mutate(
    recap,
    rate=mu,
    random_seed=timestamp,
    keep=True,
    model=msprime.InfiniteSites(alphabet=1))  ## nucleotides

print("\tPrinting to file:", newfilename)
mutated.dump(newfilename)

print("\tCreating VCF")
with open(vcffilename, "w") as vcf_file:
    try:
        #mutated.write_vcf(vcf_file, ploidy=2)
        mutated.write_vcf(vcf_file)
        #mutated.write_vcf(mutated.individuals_alive_at(0))
    except:
        print("\tSimplifying")
        simple = mutated.simplify()
        simple.write_vcf(vcf_file)
Ejemplo n.º 13
0
import msprime, sys

# parameters
sample_size = int(sys.argv[1])
effective_population_size = int(sys.argv[2])
sequence_length = int(sys.argv[3])
recombination_rate = float(sys.argv[4])
mutation_rate = float(sys.argv[5])
seed = int(sys.argv[6])

# run the simulation
# ts = msprime.simulate(sample_size=sample_size, Ne=effective_population_size, length=sequence_length, recombination_rate=recombination_rate, mutation_rate=mutation_rate, random_seed=seed)
ts = msprime.simulate(sample_size=sample_size,
                      Ne=effective_population_size,
                      length=sequence_length,
                      recombination_rate=recombination_rate,
                      random_seed=seed)
model = msprime.InfiniteSites(msprime.NUCLEOTIDES)
ts = msprime.mutate(ts, rate=mutation_rate, model=model, random_seed=seed)

# print results
with sys.stdout as vcffile:
    ts.write_vcf(vcffile, 2, position_transform="legacy"
                 )  # 2 is for diploid, "legacy" = no matching positions
Ejemplo n.º 14
0
# Recapitate!
print("\tRecapitating")
recap = ts.recapitate(recombination_rate=recomb, Ne=Ne, random_seed=timestamp)
## do not output the recapacitated tree 
#recap.dump("recipe_16.10_recap.trees")

# Plot the tree heights after recapitation
# print("\tPlotting tree heights (after): "+"After_Recap_"+filename+".png")
# breakpoints = list(recap.breakpoints())
# heights = tree_heights(recap)
# plt.step(breakpoints, heights, where='post')
# plt.savefig("After_Recap_"+filename+".png")

print("\tAdding Mutations")
mutated = msprime.mutate(recap, rate=mu, random_seed=timestamp, keep=True, model=msprime.InfiniteSites(alphabet=1)) ## nucleotides
## wont do nucleotides -- kelleher and ralph to fix -- ralph receptive to questions with python side 
## can't round positions 

print("\tPrinting to file:",newfilename)
mutated.dump(newfilename)

print("\tCreating VCF")
with open(vcffilename, "w") as vcf_file:
    mutated.write_vcf(vcf_file, 2)
    
    # Plot the tree heights after recapitation
#     print("\tPlotting tree heights (mutated)")
#     breakpoints = list(recap.breakpoints())
#     heights = tree_heights(recap)
#     plt.step(breakpoints, heights, where='post')