def step(self, action): """Advance the system by one time step. Args: action (float): The action of the agent. A value in the range [0, 1]. """ self.__state = integrate(action, self.__state, 1)
def __non_markov(self, network, max_steps, testing): # variables used in Gruau's fitness function den = 0.0 f1 = 0.0 f2 = 0.0 F = 0.0 last_values = [] steps = 0 while steps < max_steps: inputs = [ self.__state[0] / self.cart_limit, # cart's initial position self.__state[2] / self.angle_limit, # pole_1 initial angle self.__state[4] / self.angle_limit ] # pole_2 initial angle # Activate the neural network and advance system state action = network.forward(inputs)[0] if steps < 1 or steps % 2 == 0: # No action for first time step and only every second time step action = 0.5 self.__state = integrate(action, self.__state, 1) if self.__outside_bounds(): # network failed to solve the task if testing: print( f"Failed at step {steps} \t {self.__state[0]:.2f} \t {self.__state[2]:.2f} \t {self.__state[4]:.2f}" ) break else: # print "Failed at step %d" %steps break den = abs(self.__state[0]) + abs(self.__state[1]) + \ abs(self.__state[2]) + abs(self.__state[3]) last_values.append(den) if len(last_values) == 100: last_values.pop(0) # we only need to keep the last 100 values steps += 1 # compute Gruau's fitness if steps > 100: # the denominator is computed only for the last 100 time steps jiggle = sum(last_values) F = 0.1 * steps / 1000.0 + 0.9 * 0.75 / (jiggle) else: F = 0.1 * steps / 1000.0 return (steps, steps)
def __non_markov(self, network, max_steps, testing): # variables used in Gruau's fitness function # den = 0.0 # f1 = 0.0 # f2 = 0.0 # F = 0.0 last_values = [] steps = 0 while steps < max_steps: inputs = [ self.__state[0] / 4.80, # cart's initial position self.__state[2] / 0.52, # pole_1 initial angle self.__state[4] / 0.52 ] # pole_2 initial angle # activate the neural network output = network.pactivate(inputs) # advances one time step action = 0.5 * (output[0] + 1.0) # maps [-1,1] onto [0,1] self.__state = integrate(action, self.__state, 1) if self.__outside_bounds(): # network failed to solve the task if testing: print "Failed at step {0:d} \t {1:+1.2f} \t {2:+1.2f} \t {3:+1.2f}".format( steps, self.__state[0], self.__state[2], self.__state[4]) break else: # print "Failed at step %d" %steps break den = abs(self.__state[0]) + abs(self.__state[1]) + \ abs(self.__state[2]) + abs(self.__state[3]) last_values.append(den) if len(last_values) == 100: last_values.pop(0) # we only need to keep the last 100 values steps += 1 # compute Gruau's fitness if steps > 100: # the denominator is computed only for the last 100 time steps jiggle = sum(last_values) F = 0.1 * steps / 1000.0 + 0.9 * 0.75 / jiggle else: F = 0.1 * steps / 1000.0 return (F, steps)
def __non_markov(self, network, max_steps, testing): # variables used in Gruau's fitness function # den = 0.0 # f1 = 0.0 # f2 = 0.0 # F = 0.0 last_values = [] steps = 0 while steps < max_steps: inputs = [self.__state[0] / 4.80, # cart's initial position self.__state[2] / 0.52, # pole_1 initial angle self.__state[4] / 0.52] # pole_2 initial angle # activate the neural network output = network.pactivate(inputs) # advances one time step action = 0.5 * (output[0] + 1.0) # maps [-1,1] onto [0,1] self.__state = integrate(action, self.__state, 1) if self.__outside_bounds(): # network failed to solve the task if testing: print "Failed at step %d \t %+1.2f \t %+1.2f \t %+1.2f" \ % (steps, self.__state[0], self.__state[2], self.__state[4]) break else: # print "Failed at step %d" %steps break den = abs(self.__state[0]) + abs(self.__state[1]) + \ abs(self.__state[2]) + abs(self.__state[3]) last_values.append(den) if len(last_values) == 100: last_values.pop(0) # we only need to keep the last 100 values steps += 1 # compute Gruau's fitness if steps > 100: # the denominator is computed only for the last 100 time steps jiggle = sum(last_values) F = 0.1 * steps / 1000.0 + 0.9 * 0.75 / jiggle else: F = 0.1 * steps / 1000.0 return (F, steps)
def run(self, testing=False): """ Runs the cart-pole experiment and evaluates the population. """ if self.__markov: # markov experiment: full system's information is provided to the network for genome in self.__population: # genome to phenotype assert len(genome.inputs ) == 6, "There must be 6 inputs to the network" net = RNN.create(genome) self.__initial_state() if testing: # cart's position, first pole's angle, second pole's angle # print "\nInitial conditions:" print( f"{self.__state[0]:.2f} \t {self.__state[2]:.2f} \t {self.__state[4]:.2f}" ) pass steps = 0 while steps < 100000: inputs = [ self.__state[0] / 4.80, # cart's initial position self.__state[1] / 2.00, # cart's initial speed self.__state[2] / 0.52, # pole_1 initial angle self.__state[3] / 2.00, # pole_1 initial angular velocity self.__state[4] / 0.52, # pole_2 initial angle self.__state[5] / 2.00 ] # pole_2 initial angular velocity # activate the neural network output = net.forward(inputs) # maps [-1,1] onto [0,1] # action = 0.5 * (output[0] + 1.0) action = output # advances one time step self.__state = integrate(action, self.__state, 1) if self.__outside_bounds(): # network failed to solve the task if testing: print( f"Failed at step {steps} \t {self.__state[0]:.2f} \t {self.__state[2]:.2f} \t {self.__state[4]:.2f}" ) sys.exit(0) else: break steps += 1 genome.fitness = float(steps) # the higher the better # if self.print_status: # print "Chromosome %3d evaluated with score: %d " %(genome.id, genome.fitness) else: # non-markovian: no velocity information is provided (only 3 inputs) for genome in self.__population: assert len(genome.inputs ) == 3, "There must be 3 inputs to the network" net = RNN.create(genome) self.__initial_state() genome.fitness, score = self.__non_markov(net, 1000, testing) genome.score = score # print "Chromosome %3d %s evaluated with fitness %2.5f and score: %s" %(genome.id, genome.size(), genome.fitness, score) # we need to make sure that the found solution is robust enough and good at # generalizing for several different initial conditions, so the champion # from each generation (i.e., the one with the highest F) passes for a # generalization test (the criteria here was defined by Gruau) best = max(self.__population, key=lambda g: g.fitness) # selects the best network if self.print_status: print(f"\t\nBest chromosome of generation: {best.key}") # ** *******************# # GENERALIZATION TEST # # **********************# # first: can it balance for at least 100k steps? best_net = RNN.create(best) best_net.reset() self.__initial_state() # reset initial state # long non-markovian test if self.print_status: print("Starting the 100k test...") score = self.__non_markov(best_net, 100000, testing)[1] if score > 99999: if self.print_status: print( "\tWinner passed the 100k test! Starting the generalization test..." ) # second: now let's try 625 different initial conditions balanced = self.__generalization_test(best_net, testing) if balanced > 200: if self.print_status: print( f"\tWinner passed the generalization test with score: {balanced}\n" ) # set chromosome's fitness to 100k (and ceases the simulation) best.fitness = 100000 best.score = balanced else: if self.print_status: print( f"\tWinner failed the generalization test with score: {balanced}\n" ) else: if self.print_status: print( f"\tWinner failed at the 100k test with score {score}\n" )
def run(self, testing=False): """ Runs the cart-pole experiment and evaluates the population. """ if self.__markov: # markov experiment: full system's information is provided to the network for chromo in self.__population: # chromosome to phenotype assert chromo.num_inputs == 6, "There must be 6 inputs to the network" net = nn.create_phenotype(chromo) self.__initial_state() if testing: # cart's position, first pole's angle, second pole's angle # print "\nInitial conditions:" print "{0:f} \t {1:f} \t {2:f}".format(self.__state[0], self.__state[2], self.__state[4]) pass steps = 0 while steps < 100000: inputs = [self.__state[0] / 4.80, # cart's initial position self.__state[1] / 2.00, # cart's initial speed self.__state[2] / 0.52, # pole_1 initial angle self.__state[3] / 2.00, # pole_1 initial angular velocity self.__state[4] / 0.52, # pole_2 initial angle self.__state[5] / 2.00] # pole_2 initial angular velocity # activate the neural network output = net.pactivate(inputs) # maps [-1,1] onto [0,1] action = 0.5 * (output[0] + 1.0) # advances one time step self.__state = integrate(action, self.__state, 1) if self.__outside_bounds(): # network failed to solve the task if testing: print "Failed at step {0:d} \t {1:+1.2f} \t {2:+1.2f} \t {3:+1.2f}".format(steps, self.__state[0], self.__state[2], self.__state[4]) sys.exit(0) else: break steps += 1 chromo.fitness = float(steps) # the higher the better # if self.print_status: # print "Chromosome %3d evaluated with score: %d " %(chromo.id, chromo.fitness) else: # non-markovian: no velocity information is provided (only 3 inputs) for chromo in self.__population: assert chromo.num_inputs == 3, "There must be 3 inputs to the network" net = nn.create_phenotype(chromo) self.__initial_state() chromo.fitness, score = self.__non_markov(net, 1000, testing) # print "Chromosome %3d %s evaluated with fitness %2.5f and score: %s" %(chromo.id, chromo.size(), chromo.fitness, score) # we need to make sure that the found solution is robust enough and good at # generalizing for several different initial conditions, so the champion # from each generation (i.e., the one with the highest F) passes for a # generalization test (the criteria here was defined by Gruau) best = max(self.__population) # selects the best network if self.print_status: print "\t\nBest chromosome of generation: {0:d}".format(best.ID) # ** *******************# # GENERALIZATION TEST # # **********************# # first: can it balance for at least 100k steps? best_net = nn.create_phenotype(best) best_net.flush() self.__initial_state() # reset initial state # long non-markovian test if self.print_status: print "Starting the 100k test..." score = self.__non_markov(best_net, 100000, testing)[1] if score > 99999: if self.print_status: print "\tWinner passed the 100k test! Starting the generalization test..." # second: now let's try 625 different initial conditions balanced = self.__generalization_test(best_net, testing) if balanced > 200: if self.print_status: print "\tWinner passed the generalization test with score: {0:d}\n".format(balanced) # set chromosome's fitness to 100k (and ceases the simulation) best.fitness = 100000 best.score = balanced else: if self.print_status: print "\tWinner failed the generalization test with score: {0:d}\n".format(balanced) else: if self.print_status: print "\tWinner failed at the 100k test with score {0:d}\n ".format(score)