def init(): try: global pyvolve import pyvolve except: from os import chdir chdir(GC.START_DIR) assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve" # config validity checks custom_model_params = {} if isinstance(GC.nuc_kappa, str): GC.nuc_kappa = GC.nuc_kappa.strip() if len(GC.nuc_kappa) != 0: custom_model_params['kappa'] = float(GC.nuc_kappa) else: custom_model_params['kappa'] = float(GC.nuc_kappa) assert isinstance(GC.nuc_frequencies_dictionary, dict), "Specified nuc_frequencies_dictionary is not a dictionary" if len(GC.nuc_frequencies_dictionary) != 0: for key in GC.nuc_frequencies_dictionary: assert key in {'A','C','G','T'}, "%s is not a valid codon for nuc_frequencies_dictionary. Only DNA nucleotides (A, C, G, or T)" assert abs(sum(GC.nuc_frequencies_dictionary.values()) - 1) < 0.000000001, "Frequencies in nuc_frequencies_dictionary must sum to 1" custom_model_params['state_freqs'] = GC.nuc_frequencies_dictionary assert isinstance(GC.nuc_mutation_rates_dictionary, dict), "Specified nuc_mutation_rates_dictionary is not a dictionary" if len(GC.nuc_mutation_rates_dictionary) != 0: custom_model_params['mu'] = GC.nuc_mutation_rates_dictionary assert not ('kappa' in custom_model_params and 'nuc_mutation_rates_dictionary' in custom_model_params), "Cannot use custom values for both nuc_kappa and nuc_mutation_rates_dictionary: only one of the two" # set up Pyvolve if len(custom_model_params) == 0: GC.pyvolve_model = pyvolve.Model("nucleotide") else: GC.pyvolve_model = pyvolve.Model("nucleotide", custom_model_params)
def init(): try: global pyvolve import pyvolve except: from os import chdir chdir(GC.START_DIR) assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve" # config validity checks custom_model_params = {} GC.ecm_type = GC.ecm_type.strip() if GC.ecm_type == 'restricted': GC.ecm_type = 'ECMrest' elif GC.ecm_type == 'unrestricted': GC.ecm_type = 'ECMunrest' else: assert False, 'ecm_type must be "restricted" or "unrestricted"' if isinstance(GC.ecm_alpha, str): GC.ecm_alpha = GC.ecm_alpha.strip() if len(GC.ecm_alpha) != 0: custom_model_params['alpha'] = float(GC.ecm_alpha) else: custom_model_params['alpha'] = float(GC.ecm_alpha) if isinstance(GC.ecm_beta, str): GC.ecm_beta = GC.ecm_beta.strip() if len(GC.ecm_beta) != 0: custom_model_params['beta'] = float(GC.ecm_beta) else: custom_model_params['beta'] = float(GC.ecm_beta) if isinstance(GC.ecm_omega, str): GC.ecm_omega = GC.ecm_omega.strip() if len(GC.ecm_omega) != 0: custom_model_params['omega'] = float(GC.ecm_omega) else: custom_model_params['omega'] = float(GC.ecm_omega) assert isinstance( GC.ecm_codon_frequencies_dictionary, dict ), "Specified ecm_codon_frequencies_dictionary is not a dictionary" if len(GC.ecm_codon_frequencies_dictionary) != 0: codons = set(GC.generate_all_kmers(3, 'ACGT')) codons.difference_update({'TGA', 'TAA', 'TAG'}) # remove STOP codons for key in GC.ecm_codon_frequencies_dictionary: assert key in codons, "%s is not a valid codon for ecm_codon_frequencies_dictionary. Only include 3-mers of the DNA alphabet, excluding the STOP codons (TGA, TAA, and TAG)" assert abs( sum(GC.ecm_codon_frequencies_dictionary.values()) - 1 ) < 0.000000001, "Frequencies in ecm_codon_frequencies_dictionary must sum to 1" custom_model_params[ 'state_freqs'] = GC.ecm_codon_frequencies_dictionary # set up Pyvolve if len(custom_model_params) == 0: GC.pyvolve_model = pyvolve.Model(GC.ecm_type) else: GC.pyvolve_model = pyvolve.Model(GC.ecm_type, custom_model_params)
def evolve(newicks, sequence_size, scale_tree): temp = "temporary_sequences.fasta" phy_files = [] my_model = pyvolve.Model("nucleotide") partition = pyvolve.Partition(models = my_model, size = sequence_size) for i in range(0, len(newicks)): newick = newicks[i] tree = pyvolve.read_tree(tree = newick, scale_tree = scale_tree) my_evolver = pyvolve.Evolver(tree = tree, partitions = partition) fasta_seqfile = "temp" + str(i) + ".fasta" phylip_seqfile = "temp" + str(i) + ".phyl" phy_files.append(phylip_seqfile) my_evolver(seqfile=fasta_seqfile, seqfmt = "fasta", ratefile = None, infofile = None) fasta_to_phyl(fasta_seqfile, phylip_seqfile) os.remove(fasta_seqfile) phyl_output = "temp_seq.phyl" with open(phyl_output, 'w') as outfile: for fname in phy_files: with open(fname) as infile: outfile.write(infile.read()) outfile.write("\n") os.remove(fname) return phyl_output
def get_random_tree(filename, tree_string, L, kappa): # strains = read_in_strains(filename) # # L = genome_length(strains) # min_m = get_min_m(strains, L) # scaled_tree_string = scale_newick_format_tree(strains, L, min_m, tree_string) phylogeny = pyvolve.read_tree(tree = tree_string) # pyvolve.print_tree(phylogeny) freqs = [0.25,0.25,0.25,0.25] nuc_model = pyvolve.Model('nucleotide', {'kappa':kappa, 'state_freqs':freqs}) ancestor = generate_ancestor(L) print(ancestor) my_partition = pyvolve.Partition(models = nuc_model, root_sequence = ancestor) my_evolver = pyvolve.Evolver(partitions = my_partition, tree = phylogeny) my_evolver() # my_evolver(write_anc = True) simulated_strains = my_evolver.get_sequences() # strains = my_evolver.get_sequences(anc = True) # strain_names = list(strains.keys()) pi = pi_value(simulated_strains) theta = theta_value(simulated_strains) # print('pi: ' + str(pi)) # print('theta: ' + str(theta)) return {'pi': pi, 'theta': theta}
def get_random_tree(L, species, scaled_tree_string, kappa, iteration): # strains = read_in_strains(filename) # L = genome_length(strains) # min_m = get_min_m(strains, L) # max_m = get_max_m(strains, L, tree_string) # pis = [] # thetas = [] # scaled_trees = [] # for x in range(min_m,max_m+1): # scaled_tree_string = scale_newick_format_tree(strains, L, x, tree_string, increment) # scaled_trees.append(scaled_tree_string) # for tree in scaled_trees: phylogeny = pyvolve.read_tree(tree=scaled_tree_string) print('read in the tree') pyvolve.print_tree(phylogeny) freqs = [0.25, 0.25, 0.25, 0.25] nuc_model = pyvolve.Model('nucleotide', { 'kappa': kappa, 'state_freqs': freqs }) ancestor = generate_ancestor(L) print('generated an ancestor') # # print(ancestor) my_partition = pyvolve.Partition(models=nuc_model, root_sequence=ancestor) my_evolver = pyvolve.Evolver(partitions=my_partition, tree=phylogeny) my_evolver(ratefile=None, infofile=None, seqfile="simulated_alignment_" + str(species[:-1]) + "_universal_" + str(iteration + 1) + ".fasta") # # my_evolver() print('evolved the sequences') # # my_evolver(write_anc = True) simulated_strains = my_evolver.get_sequences() # # strains = my_evolver.get_sequences(anc = True) # # strain_names = list(strains.keys()) pi = pi_value(simulated_strains) theta = theta_value(simulated_strains) # pis.append(pi) # thetas.append(theta) # # print('pi: ' + str(pi)) # # print('theta: ' + str(theta)) # return {'pi': pis, 'theta': thetas} return pi, theta
def get_codon_model(self): codon_params = {} for param in ["ALPHA", "BETA", "KAPPA"]: codon_params[param.lower()] = float(self.parameters[param]) codon_fitness_file = open("Input/codon_fitness.txt", "r") codon_fitness = [] for line in codon_fitness_file: codon_fitness.append(float(line)) #codon_fitness = np.random.normal(size = 61) #f = pyvolve.ReadFrequencies("codon", file = "FBgn0034744.fasta") #frequencies = f.compute_frequencies() return pyvolve.Model("MutSel", {"fitness": codon_fitness})
def simulate(f, seqfile, tree, mu_dict, length): ''' Simulate single partition according homogeneous mutation-selection model. ''' try: my_tree = pyvolve.read_tree(file=tree) except: my_tree = pyvolve.read_tree(tree=tree) model = pyvolve.Model("MutSel", {'state_freqs': f, 'mu': mu_dict}) part = pyvolve.Partition(size=length, models=model) e = pyvolve.Evolver(partitions=part, tree=my_tree) e(seqfile=seqfile, ratefile=None, infofile=None)
def get_accurate_c(L, kappa): ancestor = generate_ancestor(L) print(ancestor) # phylogeny = pyvolve.read_tree(tree = '( (t1:0.5,t2:0.5)i1:0.5, (t3:0.5,t4:0.5)i2:0.5 , (t5:0.5,t6:0.5)i3:0.5, (t7:0.5,t8:0.5)i4:0.5 ) root;') phylogeny = pyvolve.read_tree( tree= '( ((t7:0.5,t8:0.5)i4:0.5,(t5:0.5,t6:0.5)i3:0.5)i1:0.5, (t3:0.5,t4:0.5)i2:0.5 ) root;' ) pyvolve.print_tree(phylogeny) freqs = [0.25, 0.25, 0.25, 0.25] nuc_model = pyvolve.Model('nucleotide', { 'kappa': 1.86836732388, 'state_freqs': freqs }) my_partition = pyvolve.Partition(models=nuc_model, root_sequence=ancestor) my_evolver = pyvolve.Evolver(partitions=my_partition, tree=phylogeny) # my_evolver() my_evolver(write_anc=True) # strains = my_evolver.get_sequences() strains = my_evolver.get_sequences(anc=True) strain_names = list(strains.keys()) # pre-order traversal of the tree n = len(strain_names) print(strain_names) c_sites = {} for key in strain_names: c_sites[key] = [] site_counts = L * [ None ] # list of dictionaries to keep track of which nucleotides are at each convergent site; index = site; key = nucleotide, value = number of strains with that nucleotide strains_with_site = L * [ None ] # list of the strains that have a convergent mutation at each site; index = site for x in range(L): site_counts[x] = {'A': 0, 'T': 0, 'G': 0, 'C': 0} strains_with_site[x] = [] # c_list_matrix = [[{} for x in range(n)] for y in range(n)] # matrix of the convergent mutation sites; the (i,j) entry is a dictionary of the convergent mutation sites between strain i and strain j; key = site, value = nucleotide c = 0 strain_names
def get_nucleotide_model(self): nucleotides = ['A', 'C', 'G', 'T'] state_freqs = [] custom_mu = {} for source in nucleotides: state_freqs.append(float(self.parameters[source])) for target in nucleotides: if source != target: pair = source + target custom_mu[pair] = float(self.parameters[pair]) assert abs(sum(state_freqs) - 1) < 1e-6, "Equilibrium frequencies of nucleotides must sum to 1.0" return pyvolve.Model("nucleotide", {"mu": custom_mu, "state_freqs": state_freqs})
def execute(tree, model, length, out, numSim): # read in model, tree, and define partition pyvolveModel = pyvolve.Model(model) pyvolveTree = pyvolve.read_tree(file=tree) pyvolvePartition = pyvolve.Partition(models=pyvolveModel, size=int(length)) # create evolver my_evolver = pyvolve.Evolver(tree=pyvolveTree, partitions=pyvolvePartition) my_evolver() print("Simulating sequences...") # create simluated sequences for i in range(int(numSim)): print(str(out) + "." + str(i) + ".fa") my_evolver(seqfile=str(out) + "." + str(model) + "-" + str(i) + ".fa")
def make_partition_model_set(vecs, kappa): paramlists = [ pyvolve.MutSel_Sanity("mutsel", { "fitness": vec, "kappa": kappa, "popsize": 10000 })() for vec in vecs ] matrices = [ KimuraMutSelMatrix("mutsel", params)() for params in paramlists ] for i in range(0, len(paramlists)): paramlists[i].update({"matrix": matrices[i]}) return [ pyvolve.Model("custom", plist, name=("bp%d" % i)) for (i, plist) in enumerate(paramlists) ]
def generate_Q_matrix(eq_freq, omega, all_nsy_cdn_index, all_syn_cdn_index): all_cdn_index = numpy.concatenate([all_syn_cdn_index, all_nsy_cdn_index]) cmp = { 'omega': omega, 'k_ti': 1, 'k_tv': 1 } # background_omega have to be readjusted. model = pyvolve.Model(model_type='ECMunrest', name='placeholder', parameters=cmp, state_freqs=eq_freq) mat = model.matrix dnds = get_total_Q(mat, all_nsy_cdn_index) / get_total_Q( mat, all_syn_cdn_index) mat = rescale_substitution_matrix(mat, all_nsy_cdn_index, scaling_factor=omega / dnds) return mat
def exampleFastaGenerator(nwkFile, fastaOutputLocation, seqLength, rate=1): # Tree. treeName = nwkFile[nwkFile.rindex('/'):] treeName = treeName.split('.')[0] phylogony = pyvolve.read_tree(file=nwkFile) # Rates. mutationRates = { "AC": rate, "AG": rate, "AT": rate, "CG": rate, "CT": rate, "GT": rate } # Model. model = pyvolve.Model("nucleotide", {"mu": mutationRates}) partition = pyvolve.Partition(models=model, size=seqLength) # Evolver. evolver = pyvolve.Evolver(partitions=[partition], tree=phylogony) evolver(seqfile=fastaOutputLocation, ratefile=None, infofile=None)
def evolve_nonconvergent_partition(g): if (g['num_convergent_site'] == 0): site_start = 1 else: site_start = g['num_simulated_site'] - g['num_convergent_site'] + 1 site_end = g['num_simulated_site'] print('Codon site {}-{}; Non-convergent codons'.format( site_start, site_end)) num_nonconvergent_site = g['num_simulated_site'] - g['num_convergent_site'] q_matrix = copy.copy(g['background_Q']) with suppress_stdout_stderr(): model = pyvolve.Model(model_type='custom', name='root', parameters={'matrix': q_matrix}) partition = pyvolve.Partition(models=model, size=num_nonconvergent_site) evolver = pyvolve.Evolver(partitions=partition, tree=g['background_tree']) evolver(ratefile='tmp.csubst.simulate_nonconvergent_ratefile.txt', infofile='tmp.csubst.simulate_nonconvergent_infofile.txt', seqfile='tmp.csubst.simulate_nonconvergent.fa', write_anc=False)
def generateTree(tns, ntaxa, seqlen): #Construct the tree and save as newick file t = dendropy.simulate.treesim.birth_death_tree(birth_rate=1.0, death_rate=0, taxon_namespace=tns, num_extant_tips=ntaxa) t.write(path='/tmp/pyvt', schema='newick', suppress_rooting=True, suppress_internal_node_labels=True) #Set pyvolve data type m1 = pyvolve.Model("nucleotide") p1 = pyvolve.Partition(models=m1, size=seqlen) #Read tree from dendropy pot = pyvolve.read_tree(file='/tmp/pyvt') #Simulate evolution with no save file e1 = pyvolve.Evolver(tree=pot, partitions=p1) e1(seqfile=None) seqs = e1.get_sequences() ds=dendropy.DnaCharacterMatrix.from_dict(seqs, taxon_namespace=tns) ds.write(path="evolvedsequences.fasta", schema="fasta") #print ds return t
def simulate(tree_index,length): """ Inputs: tree (integer 0-2) Outputs: array of 4 sequences, using the tree from above """ tree_map = ["alpha","beta","charlie"] tree = tree_map[tree_index] my_tree = pyvolve.read_tree(file = "trees/"+tree+".tre") #Idk weird pyvolve paramets parameters_omega = {"omega": 0.65} parameters_alpha_beta = {"beta": 0.65, "alpha": 0.98} # Corresponds to dN/dS = 0.65 / 0.98 my_model = pyvolve.Model("MG", parameters_alpha_beta) # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions (for a codon alignment, this means 250 codons, i.e. 750 nucleotide sites) my_partition = pyvolve.Partition(models = my_model, size = length) # Evolve! my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree, ratefile = None, infofile = None) my_evolver(ratefile = None, infofile = None) #Extract the sequences simulated_sequences = list(my_evolver.get_sequences().values()) return simulated_sequences
@author: david """ import pyvolve "User defined params" mut_rate = 0.005 freqs = [0.25, 0.25, 0.25, 0.25] seq_length = 1000 kappa = 2.75 "Read in phylogeny along which Pyvolve should simulate" "Scale_tree sets absolute mutation rate" my_tree = pyvolve.read_tree(file = "AMR-sim.tre", scale_tree = mut_rate) #pyvolve.print_tree(my_tree) # Print the parsed phylogeny "Specify nucleotide substitution model with custom rates" #custom_mu = {"AC":0.5, "AG":0.25, "AT":1.23, "CG":0.55, "CT":1.22, "GT":0.47} #nuc_model = pyvolve.Model( "nucleotide", {"mu":custom_mu, "state_freqs":freqs} ) "Or just use an HKY model with kappa" nuc_model = pyvolve.Model( "nucleotide", {"kappa":kappa, "state_freqs":freqs}) "Define a Partition object which evolves set # of positions according to my_model" my_partition = pyvolve.Partition(models = nuc_model, size = seq_length) #my_partition = pyvolve.Partition(models = nuc_model, root_sequence = "GATAGAAC") # Or with a root seq "Define an Evolver instance to evolve a single partition" my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree) "Evolve sequences with custom file names" my_evolver(ratefile = "AMR_ratefile.txt", infofile = "AMR_infofile.txt", seqfile = "AMR-seqsim.fasta" )
def main(): """Main body of script.""" codons = pyvolve.genetics.Genetics().codons codon_dict = pyvolve.genetics.Genetics().codon_dict pyrims = pyvolve.genetics.Genetics().pyrims purines = pyvolve.genetics.Genetics().purines args = vars(ParseArguments().parse_args()) print("Read the following command line arguments:") print("\n\t{0}".format("\n\t".join( ["{0} = {1}".format(key, value) for (key, value) in args.items()]))) print("\nPerforming simulation with pyvolve version {0}".format( pyvolve.__version__)) print("\nReading model params from {0}".format(args['modelparams'])) params = ReadParams(args['modelparams']) for (param, paramvalue) in params.items(): print("The value of {0} is {1}".format(param, paramvalue)) print("\nReading preferences from {0}".format(args['prefs'])) tup = dms_tools.file_io.ReadPreferences(args['prefs']) (sites, pis) = (tup[0], tup[2]) print("\nRead amino-acid preferences for {0} sites".format(len(pis))) tree = pyvolve.read_tree(file=args['tree']) # create models for simulation partitions = [] for r in sites: if params['diversifyingsitesA'] and (int(r) in params['diversifyingsitesA']): omega = params['diversifyingomegaA'] print r, omega elif params['diversifyingsitesB'] and ( int(r) in params['diversifyingsitesB']): omega = params['diversifyingomegaB'] print r, omega else: omega = 1.0 matrix = [] # matrix[x][y] is rate of substitution from x to y for (xi, x) in enumerate(codons): row = [] for (yi, y) in enumerate(codons): ntdiffs = [(x[j], y[j]) for j in range(3) if x[j] != y[j]] if len(ntdiffs) == 0: assert x == y row.append( 0) # will later be adjusted to make row sum to zero elif len(ntdiffs) > 1: # multi-nucleotide codon change row.append(0) else: # single nucleotide change (xnt, ynt) = ntdiffs[0] if (xnt in purines) == (ynt in purines): # transition qxy = params['kappa'] * params['phi{0}'.format(ynt)] else: # transversion qxy = params['phi{0}'.format(ynt)] (xaa, yaa) = (codon_dict[x], codon_dict[y]) if xaa == yaa: fxy = 1.0 else: pix = pis[r][xaa]**params['stringencyparameter'] piy = pis[r][yaa]**params['stringencyparameter'] if abs(pix - piy) < 1e-6: fxy = omega else: fxy = omega * math.log( piy / pix) / (1.0 - pix / piy) row.append(qxy * fxy * params['scalerate']) assert len(row) == len(codons) row[xi] = -sum(row) matrix.append(row) model = pyvolve.Model("custom", {"matrix": matrix}) partitions.append(pyvolve.Partition(models=model, size=1)) print("\nSimulating evolution, writing to {0}...".format( args['simulatedalignment'])) basename = os.path.splitext(args['simulatedalignment'])[0] evolver = pyvolve.Evolver(partitions=partitions, tree=tree) evolver( seqfile=args['simulatedalignment'], infofile='{0}_infofile.txt'.format(basename), ratefile='{0}_ratefile.txt'.format(basename), ) print("Finished simulation") uniqueseqs = set([]) uniquealignment = [] ninitial = 0 for seq in Bio.SeqIO.parse(args['simulatedalignment'], 'fasta'): ninitial += 1 seqstr = str(seq.seq) if seqstr not in uniqueseqs: uniqueseqs.add(seqstr) uniquealignment.append(seq) print( "\nAfter removing redundant sequences, we have shrunk {0} from {1} to {2} sequences" .format(args['simulatedalignment'], ninitial, len(uniquealignment))) Bio.SeqIO.write(uniquealignment, args['simulatedalignment'], 'fasta')
def get_aminoacid_model(self): return pyvolve.Model(self.parameters['AA_MODEL'])
def get_codon_model(self): codon_params = {} for param in ["ALPHA", "BETA", "KAPPA"]: codon_params[param.lower()] = float(self.parameters[param]) return pyvolve.Model(self.parameters['CODON_MODEL'], codon_params, neutral_scaling=True)
# This example script demonstrates how to evolve according to custom model with custom code import pyvolve import numpy as np # Define a phylogeny, from a file containing a newick tree my_tree = pyvolve.read_tree(file="file_with_tree.tre") # Define a custom model with custom matrix and custom code (states). The matrix must be square and have the same dimension (in 1D) as the provided code. Note that code is a list because, in theory, you can specify multi-character (as in letters) states. matrix = np.array([[-0.5, 0.25, 0.25], [0.25, -0.5, 0.25], [0.25, 0.25, -0.5]]) code = ["0", "1", "2"] my_model = pyvolve.Model("custom", {"matrix": matrix, "code": code}) my_partition = pyvolve.Partition(models=my_model, size=1) my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree) my_evolver()
# This example script demonstrates how to evolve according to a simple codon model. All model parameters (except dN/dS!) are default: equal mutation rates and equal equilibrium frequencies. import pyvolve # Define a phylogeny, from a file containing a newick tree my_tree = pyvolve.read_tree(file = "file_with_tree.tre") # Define a codon model, as a pyvolve.Model object. The first argument can be either: ## 1) "GY" or "codon" for the GY94-style (uses codon equilibrium frequencies in the matrix) ## 2) "MG" for the MG94-style (uses nucleotide equilibrium frequencies in the matrix) # Codon models require you to specify a second argument to pyvolve.Model, a dictionary of parameters. You must specify dN/dS using either "omega" (for the full ratio), or "beta" for dN and "alpha" for dS, as shown below. Either dictionary would be acceptable. parameters_omega = {"omega": 0.65} parameters_alpha_beta = {"beta": 0.65, "alpha": 0.98} # Corresponds to dN/dS = 0.65 / 0.98 my_model = pyvolve.Model("MG", parameters_alpha_beta) # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions (for a codon alignment, this means 250 codons, i.e. 750 nucleotide sites) my_partition = pyvolve.Partition(models = my_model, size = 250) # Evolve! my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree) my_evolver()
# This example script demonstrates how to evolve according to a simple nucleotide model. All model parameters are default: equal mutation rates and equal equilibrium frequencies (e.g. JC69 model). import pyvolve # Define a phylogeny, from a file containing a newick tree my_tree = pyvolve.read_tree(file = "file_with_tree.tre") # Define a nucleotide model, as a pyvolve.Model object. my_model = pyvolve.Model("nucleotide") # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions my_partition = pyvolve.Partition(models = my_model, size = 250) # Evolve! my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree) my_evolver()
# This example script demonstrates how to evolve according to a customized mutation-selection nucleotide model. Customizable parameters include mutation rates, and either equilibrium frequencies or fitness values. # Note that, for MutSel models, mutation rates do not have to be symmetric, so you can provide different rates for A->C ("AC") and C->A ("CA") import pyvolve # Define a phylogeny, from a file containing a newick tree my_tree = pyvolve.read_tree(file = "file_with_tree.tre") # Below are three example customized parameter dictionaries. Note that each of these could have "fitness" rather than "state_freqs" as a key nuc_freqs = [0.334, 0.12, 0.41, 0.136] custom_mutation_sym = {"AC": 1.5, "AG": 2.5, "AT": 0.5, "CG": 0.8, "CT": 0.99, "GT": 1.56} # For MutSel models, if you provide only 1 pair for each mutation rate (e.g. only AC and not CA), then Pyvolve will make mutation rates symmetric custom_mutation_asym = {"AC": 1.5, "CA": 0.8, "AG": 2.5, "GA": 1.2, "AT": 0.5, "TA": 1.1, "CG": 0.8, "GC": 0.9, "CT": 0.99, "TC": 2.3, "GT": 1.56, "TC": 2.56} # Customize mutation rates using symmetric mutation rates, and specify frequencies for the MutSel model parameters1 = {"state_freqs": nuc_freqs, "mu":custom_mutation_sym} # Customize mutation rates using asymmetric mutation rates, and specify frequencies for the MutSel model parameters2 = {"state_freqs": nuc_freqs, "mu":custom_mutation_asym} # Customize mutation rates using kappa, and specify frequencies for the MutSel model parameters3 = {"state_freqs": nuc_freqs, "kappa":4.25} my_model = pyvolve.Model("mutsel", parameters3) # Any of the dictionaries shown above is acceptable! # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions my_partition = pyvolve.Partition(models = my_model, size = 250) # Evolve! my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree) my_evolver()
import pyvolve # Define a phylogeny, from a file containing a newick tree my_tree = pyvolve.read_tree(file="file_with_tree.tre") # Define a nucleotide model, as a pyvolve.Model object. For this example, we'll use default parameters, but see the example script custom_aminoacid.py for other options # To implement rate heterogeneity, do either of these: ## 1) Custom rates: Provide a list of rate_factors when defining a Model object. These rate factors will be assigned to sites with equal probability by default. To change this, provide probabilities with the argument `rate_probs`. ## 2) Gamma rates: Provide the keyword arguments num_categories and alpha when defining a Model object. <num_categories> rates will be drawn from a gamma distribution with shape and scale parameter each equal to <alpha>. These rates will be equiprobable, unless overridden by `rate_probs`. # Several model definitions are shown below (first argument can be a different model, as desired). # custom rates my_model1 = pyvolve.Model( "WAG", rate_factors=[0.3, 0.8, 1.5, 2.45]) # 25% of sites will have each factor. my_model2 = pyvolve.Model( "WAG", rate_factors=[0.3, 0.8, 1.5, 2.45], rate_probs=[0.7, 0.2, 0.05, 0.05] ) # 70% of sites evolve with rate of 0.3, 20% with a rate of 0.8, 5% with a rate of 1.5, and 5% with a rate of 2.45 # gamma rates my_model3 = pyvolve.Model("WAG", alpha=0.6, num_categories=5) # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions my_partition = pyvolve.Partition(models=my_model2, size=250) # Evolve! my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree)
def init(): try: global pyvolve import pyvolve except: from os import chdir chdir(GC.START_DIR) assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve" # config validity checks GC.pyvolve_model_type = GC.pyvolve_model_type.strip() GC.pyvolve_state_frequencies_class = GC.pyvolve_state_frequencies_class.strip( ) assert GC.pyvolve_state_frequencies_class in { "EqualFrequencies", "RandomFrequencies", "CustomFrequencies" }, 'Unsupported Pyvolve state_frequencies_class selected. Choose "EqualFrequencies", "RandomFrequencies", or "CustomFrequencies"' assert isinstance( GC.pyvolve_custom_model_parameters_dictionary, dict ), "Specified pyvolve_custom_model_parameters_dictionary is not a dictionary" assert isinstance( GC.pyvolve_state_frequencies_parameters_dictionary, dict ), "Specified pyvolve_state_frequencies_parameters_dictionary is not a dictionary" assert "alphabet" in GC.pyvolve_state_frequencies_parameters_dictionary, 'Specified pyvolve_state_frequencies_parameters_dictionary does not contain mandatory "alphabet" key' assert GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"] in { "nucleotide", "amino_acid", "codon" }, 'Specified pyvolve_state_frequencies_parameters_dictionary has an invalid value for "alphabet" (must be "nucleotide", "amino_acid", or "codon")' if GC.pyvolve_state_frequencies_class == "CustomFrequencies": assert "freq_dict" in GC.pyvolve_state_frequencies_parameters_dictionary, 'Pyvolve CustomFrequencies class must have the "freq_dict" key in its pyvolve_state_frequencies_parameters_dictionary (and its value must be in the same format as the Pyvolve manual)' assert isinstance( GC. pyvolve_state_frequencies_parameters_dictionary["freq_dict"], dict ), 'Value of "freq_dict" in pyvolve_state_frequencies_parameters_dictionary is not a dictionary' # set up Pyvolve if GC.pyvolve_custom_model_parameters_dictionary == {}: GC.pyvolve_model = pyvolve.Model(GC.pyvolve_model_type) else: GC.pyvolve_model = pyvolve.Model( GC.pyvolve_model_type, GC.pyvolve_custom_model_parameters_dictionary) GC.pyvolve_f = None if "restrict" in GC.pyvolve_state_frequencies_parameters_dictionary: if GC.pyvolve_state_frequencies_class == "EqualFrequencies": GC.pyvolve_f = pyvolve.EqualFrequencies( GC.pyvolve_state_frequencies_parameters_dictionary[ "alphabet"], restrict=GC. pyvolve_state_frequencies_parameters_dictionary["restrict"] ) elif GC.pyvolve_state_frequencies_class == "RandomFrequencies": GC.pyvolve_f = pyvolve.RandomFrequencies( GC.pyvolve_state_frequencies_parameters_dictionary[ "alphabet"], restrict=GC. pyvolve_state_frequencies_parameters_dictionary["restrict"] ) elif GC.pyvolve_state_frequencies_class == "EqualFrequencies": GC.pyvolve_f = pyvolve.EqualFrequencies( GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"]) elif GC.pyvolve_state_frequencies_class == "RandomFrequencies": GC.pyvolve_f = pyvolve.RandomFrequencies( GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"]) elif GC.pyvolve_state_frequencies_class == "CustomFrequencies": GC.pyvolve_f = pyvolve.CustomFrequencies( GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"], freq_dict=GC. pyvolve_state_frequencies_parameters_dictionary["freq_dict"]) else: assert False, "Invalid Pyvolve StateFrequencies class specified" assert GC.pyvolve_f is not None, "Something went wrong in setting up the Pyvolve StateFrequencies class"
# This example script demonstrates how to evolve according to a simple amin-acid model. Customizable model parameters are default: equal equilibrium frequencies. import pyvolve # Define a phylogeny, from a file containing a newick tree my_tree = pyvolve.read_tree(file="file_with_tree.tre") # Define an amino-acid model, as a pyvolve.Model object. The first argument should be either "JTT", "WAG", "LG", "AB", "mtmam", "mtREV24", "DAYHOFF" (available empirical matrices in Pyvolve) my_model = pyvolve.Model("LG") # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions my_partition = pyvolve.Partition(models=my_model, size=250) # Evolve! my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree) my_evolver()
def pyvolvePartitions(model, divselection=None): """Get list of `pyvolve` partitions for `model`. Args: `model` (`phydmslib.models.Models` object) The model used for the simulations. Currently only certain `Models` are supported (e.g., `YNGKP`, `ExpCM`) `divselection` (`None` or 2-tuple `(divomega, divsites)`) Set this option if you want to simulate a subset of sites as under diversifying selection (e.g., an `omega` different than that used by `model`. In this case, `divomega` is the omega for this subset of sites, and `divsites` is a list of the sites in 1, 2, ... numbering. Returns: `partitions` (`list` of `pyvolve.Partition` objects) Can be fed into `pyvolve.Evolver` to simulate evolution. """ codons = pyvolve.genetics.Genetics().codons codon_dict = pyvolve.genetics.Genetics().codon_dict pyrims = pyvolve.genetics.Genetics().pyrims purines = pyvolve.genetics.Genetics().purines if divselection: (divomega, divsites) = divselection else: divsites = [] assert all([1 <= r <= model.nsites for r in divsites]) partitions = [] for r in range(model.nsites): matrix = scipy.zeros((len(codons), len(codons)), dtype='float') for (xi, x) in enumerate(codons): for (yi, y) in enumerate(codons): ntdiffs = [(x[j], y[j]) for j in range(3) if x[j] != y[j]] if len(ntdiffs) == 1: (xnt, ynt) = ntdiffs[0] qxy = 1.0 if (xnt in purines) == (ynt in purines): qxy *= model.kappa (xaa, yaa) = (codon_dict[x], codon_dict[y]) fxy = 1.0 if xaa != yaa: if type( model ) == phydmslib.models.ExpCM_empirical_phi_divpressure: fxy *= model.omega * ( 1 + model.omega2 * model.deltar[r]) elif r + 1 in divsites: fxy *= divomega else: fxy *= model.omega if type(model) in [ phydmslib.models.ExpCM, phydmslib.models.ExpCM_empirical_phi, phydmslib.models.ExpCM_empirical_phi_divpressure ]: qxy *= model.phi[NT_TO_INDEX[ynt]] pix = model.pi[r][AA_TO_INDEX[xaa]]**model.beta piy = model.pi[r][AA_TO_INDEX[yaa]]**model.beta if abs(pix - piy) > ALMOST_ZERO: fxy *= math.log(piy / pix) / (1.0 - pix / piy) elif type(model) == phydmslib.models.YNGKP_M0: for p in range(3): qxy *= model.phi[p][NT_TO_INDEX[y[p]]] else: raise ValueError("Can't handle model type {0}".format( type(model))) matrix[xi][yi] = model.mu * qxy * fxy matrix[xi][xi] = -matrix[xi].sum() # create model in way that captures annoying print statements in pyvolve old_stdout = sys.stdout sys.stdout = open(os.devnull, 'w') try: m = pyvolve.Model("custom", {"matrix": matrix}) finally: sys.stdout.close() sys.stdout = old_stdout partitions.append(pyvolve.Partition(models=m, size=1)) return partitions
def cli(gnumber, glist, gtree, edprob, gsize, glen_range, dnds, tau=None, delrate=0.0, from_al=None, protlike=False, no_syn=False, sub_rate=1.0, min_cons=0.0, outdir=""): """Extract genome content based on a list of species """ gleaf = [] no_edit = [] tree = None if gnumber: gleaf = ['Genome_{}'.format(i) for i in range(1, gnumber + 1)] elif glist: with open(glist) as G: for line in Glist: line = line.strip() if line and not line.startswith('#'): gleaf.append(line.strip('-_')) if line.startswith('-') or line.startswith('_'): no_edit.append(line.strip('-_')) elif gtree: tree = Tree(gtree) gleaf = tree.get_leaf_names() no_edit = [x.strip('_') for x in gleaf if x.startswith('_')] for node in tree: node.name = node.name.strip('_') else: raise NotImplementedError( "One of --gnumber, --glist and --gtree is needed !") if not tree: tree = Tree() tree.populate(len(gleaf), names_library=gleaf, random_branches=True) param_list = {"alpha": dnds[1], "beta": dnds[0]} if tau: param_list.update({"kappa": tau}) if from_al: # read codons frequencies from an existing alignment f = pyvolve.ReadFrequencies("codon", file=from_al) param_list.update({'state_freqs': f.compute_frequencies()}) #print(tree.get_ascii(show_internal=True, attributes=['name', 'dist'])) phylogeny = pyvolve.read_tree(tree=tree.write(format=5), scale_tree=sub_rate) codon_model = pyvolve.Model("codon", param_list) #, neutral_scaling=True) sequences = [] edited_sequences = [] truth_tables = [] # add height to tree tree = add_height_to_tree(tree) for i in range(gsize): # gene length is given from an uniform distribution alen = np.random.randint(glen_range[0], glen_range[1]) * 3 seq = simulate_genomes(codon_model, phylogeny, alen, outdir, i + 1) if delrate: seq = random_deletion(seq, tree, alen // 3, delrate) if protlike: for k in seq: seq[k] = 'ATG' + seq[k] sequences.append(seq) edited_seq, truth_table = CtoUsimulate(seq, tree, no_edit, edprob, no_syn=no_syn, min_cons=min_cons) edited_sequences.append(edited_seq) truth_tables.append(truth_table) save_data(tree, seq, edited_seq, truth_table, outdir, i + 1)
# This example script demonstrates how to evolve according to a simple codon mutation-selection (MutSel) model. # For a MutSel model, you must supply either fitness values or equilibrium frequencies. Mutation rates are set as default (equal). import pyvolve import numpy as np # imported to generate example mutation-selection model parameters # Define a phylogeny, from a file containing a newick tree my_tree = pyvolve.read_tree(file="file_with_tree.tre") # Define a mutation-selection model, specifying a first argument of "MutSel". These models that you specify either a list of *fitness* values or a list of *equilibrium frequencies* in a parameters dictionary. See the user manual for more information here! Below are examples of acceptable dictionaries parameters_fitness1 = { "fitness": np.random.uniform(-5, 5, size=61) } # Numpy array of length 61 defines codon fitness parameters_fitness2 = { "fitness": np.random.uniform(-5, 5, size=20) } # Numpy array of length 20 defines amino-acid fitness, which are applied to codons such that synonymous codons will have the same fitness parameters_freqs = { "state_freqs": np.repeat(1. / 61, 61) } # Numpy array of equal frequencies, just as an example! This list must sum to 1! my_model = pyvolve.Model( "MutSel", parameters_fitness1 ) # Any of the above parameters dictionaries are acceptable as the second argument! # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 codon positions my_partition = pyvolve.Partition(models=my_model, size=250) # Evolve! my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree) my_evolver()