def mutateAction(self, pActAtom, atomics, allTeams, parentTeam, multiActs, pSwapMultiAct, pChangeMultiAct): if not self.isActionAtomic(): # dereference old team action self.action.numLearnersReferencing -= 1 if flip(pActAtom): # atomic action if multiActs is None: self.action = random.choice( [a for a in atomics if a is not self.action]) else: swap = flip(pSwapMultiAct) if swap or not self.isActionAtomic( ): # totally swap action for another self.action = list(random.choice(multiActs)) # change some value in action if not swap or flip(pChangeMultiAct): changed = False while not changed or flip(pChangeMultiAct): index = random.randint(0, len(self.action) - 1) self.action[index] += random.gauss(0, .15) self.action = list(np.clip(self.action, 0, 1)) changed = True else: # Team action self.action = random.choice([ t for t in allTeams if t is not self.action and t is not parentTeam ]) if not self.isActionAtomic(): # add reference for new team action self.action.numLearnersReferencing += 1
def mutateInstructions_def(self, mutateParams): changed = False while not changed: # maybe delete instruction if len(self.instructions) > 1 and flip(mutateParams["pInstDel"]): # delete random row/instruction self.instructions = np.delete( self.instructions, random.randint(0, len(self.instructions) - 1), 0) changed = True # maybe mutate an instruction (flip a bit) if flip(mutateParams["pInstMut"]): # index of instruction and part of instruction idx1 = random.randint(0, len(self.instructions) - 1) idx2 = random.randint(0, 3) # change max value depending on part of instruction if idx2 == 0: maxVal = 1 elif idx2 == 1: maxVal = mutateParams["nOperations"] - 1 elif idx2 == 2: maxVal = mutateParams["nDestinations"] - 1 elif idx2 == 3: maxVal = mutateParams["inputSize"] - 1 # change it self.instructions[idx1, idx2] = random.randint(0, maxVal) changed = True # maybe swap two instructions if len(self.instructions) > 1 and flip(mutateParams["pInstSwp"]): # indices to swap idx1, idx2 = random.sample(range(len(self.instructions)), 2) # do swap tmp = np.array(self.instructions[idx1]) self.instructions[idx1] = np.array(self.instructions[idx2]) self.instructions[idx2] = tmp changed = True # maybe add instruction if flip(mutateParams["pInstAdd"]): # insert new random instruction self.instructions = np.insert( self.instructions, random.randint(0, len(self.instructions)), (random.randint(0, 1), random.randint(0, mutateParams["nOperations"] - 1), random.randint(0, mutateParams["nDestinations"] - 1), random.randint(0, mutateParams["inputSize"] - 1)), 0) changed = True
def mutateInstructions(self, pDel, pAdd, pSwp, pMut): changed = False while not changed: # maybe delete instruction if len(self.instructions) > 1 and flip(pDel): # delete random row/instruction self.instructions = np.delete( self.instructions, random.randint(0, len(self.instructions) - 1), 0) changed = True # maybe mutate an instruction (flip a bit) if flip(pMut): # index of instruction and part of instruction idx1 = random.randint(0, len(self.instructions) - 1) idx2 = random.randint(0, 3) # change max value depending on part of instruction if idx2 == 0: maxVal = 1 elif idx2 == 1: maxVal = Program.operationRange - 1 elif idx2 == 2: maxVal = Program.destinationRange - 1 elif idx2 == 3: maxVal = Program.sourceRange - 1 # change it self.instructions[idx1, idx2] = random.randint(0, maxVal) changed = True # maybe swap two instructions if len(self.instructions) > 1 and flip(pSwp): # indices to swap idx1, idx2 = random.sample(range(len(self.instructions)), 2) # do swap tmp = np.array(self.instructions[idx1]) self.instructions[idx1] = np.array(self.instructions[idx2]) self.instructions[idx2] = tmp changed = True # maybe add instruction if flip(pAdd): # insert new random instruction self.instructions = np.insert( self.instructions, random.randint(0, len(self.instructions)), (random.randint( 0, 1), random.randint(0, Program.operationRange - 1), random.randint(0, Program.destinationRange - 1), random.randint(0, Program.sourceRange - 1)), 0) changed = True
def mutate(self, mutateParams): # Make a copy of our original instructions original_instructions = copy.deepcopy(self.instructions) # Since we're mutating change our id self.id = uuid.uuid4() # While we haven't changed from our original instructions keep mutating while np.array_equal(self.instructions, original_instructions): # maybe delete instruction if len(self.instructions) > 1 and flip(mutateParams["pInstDel"]): # delete random row/instruction self.instructions = np.delete( self.instructions, random.randint(0, len(self.instructions) - 1), 0) # maybe mutate an instruction (flip a bit) if flip(mutateParams["pInstMut"]): # index of instruction and part of instruction idx1 = random.randint(0, len(self.instructions) - 1) idx2 = random.randint(0, 3) # change max value depending on part of instruction if idx2 == 0: maxVal = 1 elif idx2 == 1: maxVal = mutateParams["nOperations"] - 1 elif idx2 == 2: maxVal = mutateParams["nDestinations"] - 1 elif idx2 == 3: maxVal = mutateParams["inputSize"] - 1 # change it self.instructions[idx1, idx2] = random.randint(0, maxVal) # maybe swap two instructions if len(self.instructions) > 1 and flip(mutateParams["pInstSwp"]): # indices to swap idx1, idx2 = random.sample(range(len(self.instructions)), 2) # do swap tmp = np.array(self.instructions[idx1]) self.instructions[idx1] = np.array(self.instructions[idx2]) self.instructions[idx2] = tmp # maybe add instruction if flip(mutateParams["pInstAdd"]): # insert new random instruction self.instructions = np.insert( self.instructions, random.randint(0, len(self.instructions)), (random.randint(0, 1), random.randint(0, mutateParams["nOperations"] - 1), random.randint(0, mutateParams["nDestinations"] - 1), random.randint(0, mutateParams["inputSize"] - 1)), 0) return self
def mutate_real(self, mutateParams, parentTeam, teams, pActAtom, learner_id): # first maybe mutate just program if self.actionLength > 0 and flip(0.5): self.program.mutate(mutateParams) # mutate action if flip(pActAtom): # atomic ''' If we already have an action code make sure not to pick the same one. TODO handle case where there is only 1 action code. ''' if self.actionCode is not None: options = list( filter(lambda code: code != self.actionCode, mutateParams["actionCodes"])) else: options = mutateParams["actionCodes"] # let our current team know we won't be pointing to them anymore if not self.isAtomic(): #print("Learner {} switching from Team {} to atomic action".format(learner_id, self.teamAction.id)) self.teamAction.inLearners.remove(str(learner_id)) self.actionCode = random.choice(options) self.actionLength = mutateParams["actionLengths"][self.actionCode] self.teamAction = None else: # team action selection_pool = [ t for t in teams if t is not self.teamAction and t is not parentTeam ] # If we have a valid set of options choose from them if len(selection_pool) > 0: # let our current team know we won't be pointing to them anymore oldTeam = None if not self.isAtomic(): oldTeam = self.teamAction self.teamAction.inLearners.remove(str(learner_id)) self.teamAction = random.choice(selection_pool) # Let the new team know we're pointing to them self.teamAction.inLearners.append(str(learner_id)) #if oldTeam != None: # print("Learner {} switched from Team {} to Team {}".format(learner_id, oldTeam.id, self.teamAction.id)) return self
def mutation_add(self, probability, maxTeamSize, selection_pool): original_probability = float(probability) # Zero chance to add anything, return right away if probability == 0.0 or len(selection_pool) == 0 or ( maxTeamSize > 0 and len(self.learners) >= maxTeamSize): return [] if probability >= 1.0: # If this were true, we'd end up adding the entire selection pool raise Exception("pLrnAdd is greater than or equal to 1.0!") added_learners = [] while flip(probability) and (maxTeamSize <= 0 or len(self.learners) < maxTeamSize): # If no valid selections left, break out of the loop if len(selection_pool) == 0: break probability *= original_probability # decrease next chance learner = random.choice(selection_pool) added_learners.append(learner) self.addLearner(learner) # Ensure we don't pick the same learner twice by filtering the learners we've added from the selection pool selection_pool = list( filter(lambda x: x not in added_learners, selection_pool)) return added_learners
def mutate_def(self, mutateParams, parentTeam, teams, pActAtom): changed = False while not changed: # mutate the program if flip(mutateParams["pProgMut"]): changed = True self.program.mutate(mutateParams) # mutate the action if flip(mutateParams["pActMut"]): changed = True self.actionObj.mutate(mutateParams, parentTeam, teams, pActAtom, learner_id=self.id) return self
def mutate(self, pMutProg, pMutAct, pActAtom, atomics, parentTeam, allTeams, pDelInst, pAddInst, pSwpInst, pMutInst, multiActs, pSwapMultiAct, pChangeMultiAct, uniqueProgThresh, inputs=None, outputs=None, update=True): changed = False while not changed: # mutate the program if flip(pMutProg): changed = True self.program.mutate(pMutProg, pDelInst, pAddInst, pSwpInst, pMutInst, len(self.registers), uniqueProgThresh, inputs=inputs, outputs=outputs, update=update) # mutate the action if flip(pMutAct): changed = True self.mutateAction(pActAtom, atomics, allTeams, parentTeam, multiActs, pSwapMultiAct, pChangeMultiAct)
def mutateInstructions(self, pDel, pAdd, pSwp, pMut): changed = False while not changed: # maybe delete instruction if len(self.instructions) > 1 and flip(pDel): del self.instructions[random.randint( 0, len(self.instructions) - 1)] changed = True # maybe mutate an instruction (flip a bit) if flip(pMut): idx = random.randint(0, len(self.instructions) - 1) num = self.instructions[idx] totalLen = sum(Program.instructionLengths) bit = random.randint(0, totalLen - 1) self.instructions[idx] = bitFlip(num, bit, totalLen) changed = True # maybe swap two instructions if len(self.instructions) > 1 and flip(pSwp): # indices to swap idx1, idx2 = random.sample(range(len(self.instructions)), 2) # do swap tmp = self.instructions[idx1] self.instructions[idx1] = self.instructions[idx2] self.instructions[idx2] = tmp changed = True # maybe add instruction if flip(pAdd): maxInst = 2**sum(Program.instructionLengths) - 1 self.instructions.insert( random.randint(0, len(self.instructions) - 1), random.randint(0, maxInst)) changed = True
def mutate(self, pMutRep, pDelInst, pAddInst, pSwpInst, pMutInst, regSize, uniqueProgThresh, inputs=None, outputs=None, update=True, maxMuts=100): if inputs is not None and outputs is not None: # mutate until distinct from others unique = False while not unique: if maxMuts <= 0: break # too much maxMuts -= 1 unique = True # assume unique until shown not self.mutateInstructions(pDelInst, pAddInst, pSwpInst, pMutInst) self.update() # check unique on all inputs from all learners outputs # input and outputs of i'th learner for i, lrnrInputs in enumerate(inputs): lrnrOutputs = outputs[i] for j, input in enumerate(lrnrInputs): output = lrnrOutputs[j] regs = np.zeros(regSize) Program.execute(input, regs, self.modes, self.operations, self.destinations, self.sources) myOut = regs[0] if abs(output - myOut) < uniqueProgThresh: unique = False break if unique == False: break else: # mutations repeatedly, random amount mutated = False while not mutated or flip(pMutRep): self.mutateInstructions(pDelInst, pAddInst, pSwpInst, pMutInst) mutated = True if update: self.update()
def mutation_mutate(self, probability, mutateParams, teams): mutated_learners = {} ''' This original learners thing is important, otherwise may mutate learners that we just added through mutation. This breaks reference tracking because it results in 'ghost learners' that were created during mutation, added themselves to inLearners in the teams they pointed to, but them were mutated out before being tracked by the trainer. So you end up with teams hold a record in their inLearners to a learner that doesn't exist ''' original_learners = list(self.learners) new_learners = [] for learner in original_learners: if flip(probability): # If we only have one learner with an atomic action and the current learner is it if self.numAtomicActions() == 1 and learner.isActionAtomic(): pActAtom0 = 1.1 # Ensure their action remains atomic else: # Otherwise let there be a probability that the learner's action is atomic as defined in the mutate params pActAtom0 = mutateParams['pActAtom'] #print("Team {} creating learner".format(self.id)) # Create a new new learner newLearner = Learner(mutateParams, learner.program, learner.actionObj, len(learner.registers), learner.id) new_learners.append(newLearner) # Add the mutated learner to our learners # Must add before mutate so that the new learner has this team in its inTeams self.addLearner(newLearner) # mutate it newLearner.mutate(mutateParams, self, teams, pActAtom0) # Remove the existing learner from the team self.removeLearner(learner) #print("removing old learner {}".format(learner.id)) # Add the mutated learner to our list of mutations mutated_learners[str(learner.id)] = str(newLearner.id) return mutated_learners, new_learners
def mutation_delete(self, probability): original_probability = float(probability) if probability == 0.0: return [] if probability >= 1.0: # If this were true we'd end up deleting every learner raise Exception("pLrnDel is greater than or equal to 1.0!") # Freak out if we don't have an atomic action if self.numAtomicActions() < 1: raise Exception( "Less than one atomic action in team! This shouldn't happen", self) deleted_learners = [] # delete some learners while flip(probability) and len( self.learners) > 2: # must have >= 2 learners probability *= original_probability # decrease next chance # If we have more than one learner with an atomic action pick any learner to delete if self.numAtomicActions() > 1: learner = random.choice(self.learners) else: # Otherwise if we only have one, filter it out and pick from the remaining learners ''' Use filter() to filter a list. Call filter(function, iterable) with iterable as a list to get an iterator containing only elements from iterable for which function returns True. Call list(iterable) with iterable as the previous result to convert iterable to a list. ''' valid_choices = list( filter(lambda x: not x.isActionAtomic(), self.learners)) learner = random.choice(valid_choices) deleted_learners.append(learner) self.removeLearner(learner) return deleted_learners
def mutate(self, pDelLrn, pAddLrn, pMutLrn, allLearners, pMutProg, pMutAct, pActAtom, atomics, allTeams, pDelInst, pAddInst, pSwpInst, pMutInst, multiActs, pSwapMultiAct, pChangeMultiAct, uniqueProgThresh, inputs=None, outputs=None, update=True): # delete some learners p = pDelLrn while flip(p) and len(self.learners) > 2: # must have >= 2 learners p *= pDelLrn # decrease next chance # choose non-atomic learners if only one atomic remaining learner = random.choice([ l for l in self.learners if not l.isActionAtomic() or self.numAtomicActions() > 1 ]) self.removeLearner(learner) # add some learners p = pAddLrn while flip(p): p *= pAddLrn # decrease next chance learner = random.choice([ l for l in allLearners if l not in self.learners and l.action is not self ]) self.addLearner(learner) # give chance to mutate all learners oLearners = list(self.learners) for learner in oLearners: if flip(pMutLrn): if self.numAtomicActions() == 1 and learner.isActionAtomic(): pActAtom0 = 1 # action must be kept atomic if only one else: pActAtom0 = pActAtom # must remove then re-add fresh mutated learner self.removeLearner(learner) newLearner = Learner(learner=learner) newLearner.mutate(pMutProg, pMutAct, pActAtom0, atomics, self, allTeams, pDelInst, pAddInst, pSwpInst, pMutInst, multiActs, pSwapMultiAct, pChangeMultiAct, uniqueProgThresh, inputs=inputs, outputs=outputs, update=update) self.addLearner(newLearner)