コード例 #1
0
ファイル: lab2_N.py プロジェクト: volkovalexeytsu/IS_LAB2

#Одноточечный кроссинговер
def crossover(parent_1, parent_2):
    crossover_index = random.randrange(1, len(parent_1))
    child_1a = parent_1[:crossover_index]
    child_1b = [i for i in parent_2 if i not in child_1a]
    child_1 = child_1a + child_1b

    child_2a = parent_2[crossover_index:]
    child_2b = [i for i in parent_1 if i not in child_2a]
    child_2 = child_2a + child_2b
    return child_1, child_2


ga.crossover_function = crossover


#Целочисленная мутация
def mutate(individual):
    mutate_index1 = random.randrange(len(individual))
    mutate_index2 = random.randrange(len(individual))
    individual[mutate_index1], individual[mutate_index2] = individual[
        mutate_index2], individual[mutate_index1]


ga.mutate_function = mutate


#Рулеточная селекция
def selection(population):
コード例 #2
0
def generate(target_params, insert_aa_seq, population_size=100, mutation_probability=0.3, crossover_probability=0.8, max_gens_since_improvement=50, genetic_code=11, verbose=False):
    '''Generate a sequence matching :math:`k`-mer usage.

	Args:
		target_params (dict): The parameters to optimize towards. Should be of the format {:math:`k_n`: {:math:`k_{n1}`: 0.2, :math:`k_{n2}`: 0.3,...}...}
		insert_aa_seq (str): The amino acid sequence for the optimized sequence.
		population_size (int, optional): The size of the population for the genetic algorithm. Defaults to 100.
		mutation_probability (float, optional): The likelihood of changing each member of each generation. Defaults to 0.3.
		crossover_probability (float, optional): The likelihood of each member of the population undergoing crossover. Defaults to 0.8.
		max_gens_since_improvement (int, optional): The number of generations of no improvement after which to stop optimization. Defaults to 50.
		genetic_code (int, optional): The genetic code to use. Defaults to 11, the standard genetic code.
		verbose (bool, optional): Whether to print the generation number, generations since improvement, and fitness. Defaults to false.

	Returns:
		str: The generated sequence.
	'''
    # back translate to an initial seq
    insert = ""
    for aa in insert_aa_seq:
        try:
            insert += Bio.Data.CodonTable.unambiguous_dna_by_id[genetic_code].back_table[aa]
        except:
            if aa == "*":
                insert += Bio.Data.CodonTable.unambiguous_dna_by_id[genetic_code].back_table[None]

    # create the genetic algorithm instance
    ga = GeneticAlgorithm(dna_to_vector(insert),
                          crossover_probability=crossover_probability,
                          maximise_fitness=False,
                          population_size=population_size,
                          mutation_probability=mutation_probability)

    # get the target values of k
    k = list(target_params.keys())

    # generate the target vector from the input dict
    target = np.array([])
    for _k in sorted([x for x in k if x != "codons"]):
        target = np.concatenate((target, [x[1] for x in sorted(target_params[_k].items(), key=lambda x: x[0])]))
    if "codons" in k:
        target = np.concatenate((target, [x[1] for x in sorted(target_params["codons"].items(), key=lambda x: x[0])]))

    def vector(seq):
        output = k_mer_frequencies(seq, [x for x in k if x != "codons"], include_missing=True, vector=True)
        if "codons" in k:
            output = np.concatenate((output, [x[1] for x in sorted(codon_frequencies(seq, genetic_code).items(), key=lambda x: x[0])]))
        return output

    def fitness(individual, data):
        individual = vector_to_dna(individual)
        # fitness = np.linalg.norm(target - vector(individual))
        fitness = jensen_shannon_divergence([dit.ScalarDistribution(target), dit.ScalarDistribution(vector(individual))])
        return fitness
    ga.fitness_function = fitness

    synonymous_codons = _synonymous_codons(genetic_codes[genetic_code])
    def mutate(individual):
        while True:
            # choose a random codon
            codon_idx = np.random.randint(len(individual) / 6) * 6

            # figure out which codon it is
            codon = vector_to_dna(individual[codon_idx:codon_idx+6])

            # ensure that mutations actually change the sequence
            if len(synonymous_codons[codon]) != 1:
                break

        # choose a new one at random for the AA
        new_codon = dna_to_vector(np.random.choice([x for x in synonymous_codons[codon] if x != codon]))

        # replace it in the individual
        individual[codon_idx:codon_idx+6] = new_codon

        return individual
    ga.mutate_function = mutate

    def crossover(parent_1, parent_2):
        parent_1, parent_2 = list(parent_1), list(parent_2)
        index = random.randrange(1, len(parent_1) / 6) * 6
        child_1 = parent_1[:index] + parent_2[index:]
        child_2 = parent_2[:index] + parent_1[index:]
        return child_1, child_2
    ga.crossover_function = crossover

    def create_individual(seed_data):
        individual = vector_to_dna(seed_data)
        new = ""
        for codon in [individual[i:i+3] for i in range(0, len(individual), 3)]:
            if len(synonymous_codons[codon]) == 1:
                new += codon
                continue
            new += np.random.choice([x for x in synonymous_codons[codon] if x != codon])

        return dna_to_vector(new)
    ga.create_individual = create_individual

    # set up for GA run
    ga.create_first_generation()
    gens_since_improvement = 0
    best_indv_fitness = ga.best_individual()[0]
    counter = 1

    # run the GA
    try:
	    while gens_since_improvement < max_gens_since_improvement:
	        ga.create_next_generation()
	        if ga.best_individual()[0] < best_indv_fitness:
	            best_indv_fitness = ga.best_individual()[0]
	            gens_since_improvement = 0
	        else:
	            gens_since_improvement += 1
	        if verbose:
	            print("Gen: %s\tSince Improvement: %s/%s\tFitness: %s".expandtabs(15) % (counter, gens_since_improvement, max_gens_since_improvement, ga.best_individual()[0]), end="\r")
	        counter += 1
    except KeyboardInterrupt:
        print("\nStopping early...")

    if verbose: print()

    best_seq = vector_to_dna(ga.best_individual()[1])
    best_freqs = vector(best_seq)
    assert Seq(best_seq).translate(genetic_code) == Seq(insert).translate(genetic_code)
    return best_seq