def reproduce(female_pop, male_pop, male_deg_l, male_fit, sort_by_degree): # Female and male is not like biological females and males. # It's just a solution allowing the algo to use different # populations for mating them. The female population is the # one that will be looped in, each individual from this pop # will have two children. The males are chosen in the male pop # that can be the same pop, they'll be chosen randomly with some rules. # Reproduction is made between genes coding for polynomials with the same # degree. It consists in a random number (1, 2 or 3) of crossovers. # https://www.tutorialspoint.com/genetic_algorithms/genetic_algorithms_crossover.htm children = [] for i in range( len(female_pop )): # Each individual from population will produce 2 children # by crossing his gene with another parent producing the # same degree polynomial, chosen randomly with a weighted # probability based on the fitness. if sort_by_degree == 1: # Case of intra-degrees reproduction indx = [j for j, x in enumerate(male_deg_l) if x == male_deg_l[i]] # indx is the index of parents # producing the same degree polynomial fitx = [male_fit[k] for k in indx] # fitx is the corresponding fitness fitx = [m / sum(fitx) for m in fitx] # list to these parents d = male_pop[choose(indx, p=fitx)][:] # d is the male (father code) else: male_fit = [m / sum(male_fit) for m in male_fit] d = male_pop[choose( [*range(len(male_pop))], p=male_fit)] # Case of inter-degrees reproduction c = female_pop[i][:] # c is the female (mother) code cross = choice([1, 2, 3 ]) # cross is the randomly chosen number of successive for cr in range(cross): # crossovers operation cut = choice(range(1, min(len(c), len(d)))) cut = choice([-cut, cut]) c = c[:cut] + d[cut:] d = d[:cut] + c[cut:] children.extend( [c, d]) # The resulting two children are added to the population # Parent are not suppressed, at this point the population # triples. It will be reduced in selection() female_pop.extend(children) return female_pop
def create_fake_profile(self, n_profile, verbose = False, ssn_sep_change = True): assert isinstance(n_profile, int), "Please enter an integer\ for the number of generated profiles." self.n_profile = n_profile fake_profiles = dict() # use faker package to generate either a full/last name/first name. fake_profiles["Name"] = [choose([self.faker.name(),\ self.faker.last_name(), self.faker.first_name()])\ for _ in range(self.n_profile)] # use faker to generate either a full/secondary/street address fake_profiles["Address"] = [choose([self.faker.address(),\ self.faker.street_address(),\ self.faker.secondary_address()])\ for _ in range(self.n_profile)] fake_profiles["SSN"] = [self.faker.ssn() for _ in range(self.n_profile)] fake_profiles["Email"] = [self.faker.email() for _ in range(self.n_profile)] fake_profiles["Plates"] = [self.faker.license_plate()\ for _ in range(self.n_profile)] fake_profiles["CreditCardNumber"] = [self.faker.credit_card_number()\ for _ in range(self.n_profile)] fake_profiles["Phone_number"] = [self.faker.phone_number()\ for _ in range(self.n_profile)] # change the separator in SSN data. if ssn_sep_change: fake_profiles["SSN"] = _random_sep_change(fake_profiles["SSN"]) # change the separator in Address data from "/n" to " " fake_profiles['Address'] = [sep_change(each_address, init_sep = "\n" , after_sep = " ")\ for each_address in fake_profiles['Address'] ] self.fake_profiles = fake_profiles print("Finished creating fake profiles.") if verbose: return self.fake_profiles
def generate_game_states(): # rewards win = 10 tie = 5 loss = 0 # randomly choose which side starts starting_side = random.choose([-1, 1])
def _random_pii_insert(self): # randomly insert PII into the text for index, PII in enumerate(tqdm(self.pii_labels)): # choose a PII value from the dictionary according to the PII type. PII_value = choose(self.fake_profiles[PII]) original_fake_text = self._init_fake_text_no_pii[index] tokenized_fake_text = original_fake_text.split(" ") # generate the position to fill in the PII value PII_position = choose(range(len(tokenized_fake_text)+1)) tokenized_fake_text.insert(PII_position, PII_value) one_text_mixed_with_PII = " ".join(tokenized_fake_text) self.pii_with_text[index] = one_text_mixed_with_PII self.PII[index] = PII_value
def mutation(genes, rate=2): bases_num = len(genes) * len(genes[0]) mut_num = sum([ choose([0, 1], p=[1 - rate / 100, rate / 100]) for i in range(bases_num) ]) for mut in range(mut_num): gene = choice(*[range(len(genes))]) cod = choice(*[range(len(genes[0]))]) new = choice( [i for i in ['A', 'T', 'G', 'C'] if i != genes[gene][cod]]) genes[gene] = genes[gene][:cod] + new + genes[gene][cod + 1:]
def _random_sep_change(data, init_sep = "-", after_sep = " ", percentage = 0.5 , seed = 7): """ A function to randomly change the SSN data's separator. The input data is a list. """ setseed(seed) # generate the index for replacing separator. replacing_indexes = choose(range(len(data)), int(len(data)*percentage)) for each_replacing_index in replacing_indexes: # change the ssn data's separator from init_sep to after_sep data[each_replacing_index] = sep_change(data[each_replacing_index], init_sep, after_sep) return data
def insert(gene): set = 3 * choice([*range(1, 5)]) loc = choice([*range(len(gene))]) ins = ''.join(choose(['A', 'T', 'G', 'C'], size=set)) gene = gene[:loc] + ins + gene[loc:] return gene
def mutate(pop, code, spreading, mut_rate=0.005): # All different types of random mutations : # https://www.tutorialspoint.com/genetic_algorithms/genetic_algorithms_mutation.htm mut_rate = mut_rate + 0.008 / ( 1 + spreading) # spreading is the difference between the best and # the worst distances. The mutation rate will increase # as spreading becomes smaller. This helps the population # to keep on evolving and finding new solutions when # it becomes too much homogenic. def flip(gene): loc = choice([*range(len(gene))]) gene = gene[:loc] + choice('ATGC') + gene[loc + 1:] return gene def swap(gene): loc_a = choice([*range(len(gene))]) loc_b = choice([*range(loc_a, len(gene))]) gene = gene[:loc_a] + gene[loc_b] + gene[loc_a + 1:loc_b] + gene[ loc_a] + gene[loc_b + 1:] return gene def scramble(gene): set = choice([*range(len(gene) // 10)]) loc = choice([*range(len(gene) - set)]) rep = gene[loc:loc + set] gene = gene[:loc] + ''.join(sample(rep, k=len(rep))) + gene[loc + set:] return gene def inverse(gene): set = choice([*range(2, 11)]) loc = choice([*range(len(gene) - set)]) rep = gene[loc:loc + set] gene = gene[:loc] + rep[::-1] + gene[loc + set:] return gene def insert(gene): set = 3 * choice([*range(1, 5)]) loc = choice([*range(len(gene))]) ins = ''.join(choose(['A', 'T', 'G', 'C'], size=set)) gene = gene[:loc] + ins + gene[loc:] return gene def delete(gene): set = 3 * choice([*range(1, 5)]) loc = choice([*range(len(gene) - set)]) gene = gene[:loc] + gene[loc + set:] return gene for i in range( len(pop) ): # mutations are possibly applied to each gene of the population. # The default rate is 0.005, 5 on 1000 bases probabiliy to occur # then the kind of mutation is randomly chosen gene = pop[i] mut = sum((choose([0, 1], size=len(pop[i]), p=[1 - mut_rate, mut_rate]))) for i in range(mut): gene = choose([ flip(gene), swap(gene), scramble(gene), inverse(gene), insert(gene), delete(gene) ]) if code[gene[:3]] == 'stop': gene = gene[3:] if code[gene[-3:]] == 'stop': gene = gene[:-3] pop[i] = gene return pop
import pandas as pd import numpy as np from numpy.random import choice as choose species = ['dog', 'cat'] colors = ['black', 'red'] habitats = ['city', 'country'] N = 40 animals = [] for i in range(N // 2): animals.append({'specy': choose(species), 'color': choose(colors), 'habitat': choose(habitats), 'size': {'legs': np.random.rand(), 'head': np.random.rand()}}) animals.append({'specy': choose(species), 'color': choose(colors), 'habitat': choose(habitats), 'size': {'paw': np.random.rand()}}) df = pd.DataFrame.from_records(animals, columns=('specy', 'color', 'habitat', 'size')) df2 = df.drop('size', axis=1).join(pd.DataFrame(df['size'].values.tolist()))