Example #1
0
    def step(self, action):
        """Advance the system by one time step.

        Args:
            action (float): The action of the agent. A value in the range [0, 1].
        """
        self.__state = integrate(action, self.__state, 1)
Example #2
0
    def __non_markov(self, network, max_steps, testing):
        # variables used in Gruau's fitness function
        den = 0.0
        f1 = 0.0
        f2 = 0.0
        F = 0.0
        last_values = []

        steps = 0
        while steps < max_steps:
            inputs = [
                self.__state[0] / self.cart_limit,  # cart's initial position
                self.__state[2] / self.angle_limit,  # pole_1 initial angle
                self.__state[4] / self.angle_limit
            ]  # pole_2 initial angle

            # Activate the neural network and advance system state
            action = network.forward(inputs)[0]

            if steps < 1 or steps % 2 == 0:
                # No action for first time step and only every second time step
                action = 0.5

            self.__state = integrate(action, self.__state, 1)

            if self.__outside_bounds():
                # network failed to solve the task
                if testing:
                    print(
                        f"Failed at step {steps} \t {self.__state[0]:.2f} \t {self.__state[2]:.2f} \t {self.__state[4]:.2f}"
                    )
                    break
                else:
                    # print "Failed at step %d" %steps
                    break

            den = abs(self.__state[0]) + abs(self.__state[1]) + \
                  abs(self.__state[2]) + abs(self.__state[3])
            last_values.append(den)

            if len(last_values) == 100:
                last_values.pop(0)  # we only need to keep the last 100 values

            steps += 1

        # compute Gruau's fitness
        if steps > 100:
            # the denominator is computed only for the last 100 time steps
            jiggle = sum(last_values)
            F = 0.1 * steps / 1000.0 + 0.9 * 0.75 / (jiggle)
        else:
            F = 0.1 * steps / 1000.0

        return (steps, steps)
Example #3
0
    def __non_markov(self, network, max_steps, testing):
        # variables used in Gruau's fitness function
        # den = 0.0
        # f1 = 0.0
        # f2 = 0.0
        # F = 0.0
        last_values = []

        steps = 0
        while steps < max_steps:
            inputs = [
                self.__state[0] / 4.80,  # cart's initial position
                self.__state[2] / 0.52,  # pole_1 initial angle
                self.__state[4] / 0.52
            ]  # pole_2 initial angle

            # activate the neural network
            output = network.pactivate(inputs)
            # advances one time step
            action = 0.5 * (output[0] + 1.0)  # maps [-1,1] onto [0,1]
            self.__state = integrate(action, self.__state, 1)

            if self.__outside_bounds():
                # network failed to solve the task
                if testing:
                    print "Failed at step {0:d} \t {1:+1.2f} \t {2:+1.2f} \t {3:+1.2f}".format(
                        steps, self.__state[0], self.__state[2],
                        self.__state[4])
                    break
                else:
                    # print "Failed at step %d" %steps
                    break

            den = abs(self.__state[0]) + abs(self.__state[1]) + \
                  abs(self.__state[2]) + abs(self.__state[3])
            last_values.append(den)

            if len(last_values) == 100:
                last_values.pop(0)  # we only need to keep the last 100 values

            steps += 1

        # compute Gruau's fitness
        if steps > 100:
            # the denominator is computed only for the last 100 time steps
            jiggle = sum(last_values)
            F = 0.1 * steps / 1000.0 + 0.9 * 0.75 / jiggle
        else:
            F = 0.1 * steps / 1000.0

        return (F, steps)
Example #4
0
    def __non_markov(self, network, max_steps, testing):
        # variables used in Gruau's fitness function
        # den = 0.0
        # f1 = 0.0
        # f2 = 0.0
        # F = 0.0
        last_values = []

        steps = 0
        while steps < max_steps:
            inputs = [self.__state[0] / 4.80,  # cart's initial position
                      self.__state[2] / 0.52,  # pole_1 initial angle
                      self.__state[4] / 0.52]  # pole_2 initial angle

            # activate the neural network
            output = network.pactivate(inputs)
            # advances one time step
            action = 0.5 * (output[0] + 1.0)  # maps [-1,1] onto [0,1]
            self.__state = integrate(action, self.__state, 1)

            if self.__outside_bounds():
                # network failed to solve the task
                if testing:
                    print "Failed at step %d \t %+1.2f \t %+1.2f \t %+1.2f" \
                          % (steps, self.__state[0], self.__state[2], self.__state[4])
                    break
                else:
                    # print "Failed at step %d" %steps
                    break

            den = abs(self.__state[0]) + abs(self.__state[1]) + \
                  abs(self.__state[2]) + abs(self.__state[3])
            last_values.append(den)

            if len(last_values) == 100:
                last_values.pop(0)  # we only need to keep the last 100 values

            steps += 1

        # compute Gruau's fitness
        if steps > 100:
            # the denominator is computed only for the last 100 time steps
            jiggle = sum(last_values)
            F = 0.1 * steps / 1000.0 + 0.9 * 0.75 / jiggle
        else:
            F = 0.1 * steps / 1000.0

        return (F, steps)
Example #5
0
    def run(self, testing=False):
        """ Runs the cart-pole experiment and evaluates the population. """

        if self.__markov:
            # markov experiment: full system's information is provided to the network
            for genome in self.__population:
                # genome to phenotype
                assert len(genome.inputs
                           ) == 6, "There must be 6 inputs to the network"
                net = RNN.create(genome)

                self.__initial_state()

                if testing:
                    # cart's position, first pole's angle, second pole's angle
                    # print "\nInitial conditions:"
                    print(
                        f"{self.__state[0]:.2f} \t {self.__state[2]:.2f} \t {self.__state[4]:.2f}"
                    )
                    pass

                steps = 0

                while steps < 100000:
                    inputs = [
                        self.__state[0] / 4.80,  # cart's initial position
                        self.__state[1] / 2.00,  # cart's initial speed
                        self.__state[2] / 0.52,  # pole_1 initial angle
                        self.__state[3] /
                        2.00,  # pole_1 initial angular velocity
                        self.__state[4] / 0.52,  # pole_2 initial angle
                        self.__state[5] / 2.00
                    ]  # pole_2 initial angular velocity

                    # activate the neural network
                    output = net.forward(inputs)
                    # maps [-1,1] onto [0,1]
                    # action = 0.5 * (output[0] + 1.0)
                    action = output
                    # advances one time step
                    self.__state = integrate(action, self.__state, 1)

                    if self.__outside_bounds():
                        # network failed to solve the task
                        if testing:
                            print(
                                f"Failed at step {steps} \t {self.__state[0]:.2f} \t {self.__state[2]:.2f} \t {self.__state[4]:.2f}"
                            )
                            sys.exit(0)
                        else:
                            break
                    steps += 1

                genome.fitness = float(steps)  # the higher the better
                # if self.print_status:
                #    print "Chromosome %3d evaluated with score: %d " %(genome.id, genome.fitness)

        else:
            # non-markovian: no velocity information is provided (only 3 inputs)
            for genome in self.__population:
                assert len(genome.inputs
                           ) == 3, "There must be 3 inputs to the network"
                net = RNN.create(genome)
                self.__initial_state()

                genome.fitness, score = self.__non_markov(net, 1000, testing)
                genome.score = score

                # print "Chromosome %3d %s evaluated with fitness %2.5f and score: %s" %(genome.id, genome.size(), genome.fitness, score)

            # we need to make sure that the found solution is robust enough and good at
            # generalizing for several different initial conditions, so the champion
            # from each generation (i.e., the one with the highest F) passes for a
            # generalization test (the criteria here was defined by Gruau)
            best = max(self.__population,
                       key=lambda g: g.fitness)  # selects the best network
            if self.print_status:
                print(f"\t\nBest chromosome of generation: {best.key}")

            # ** *******************#
            #  GENERALIZATION TEST  #
            # **********************#

            # first: can it balance for at least 100k steps?
            best_net = RNN.create(best)
            best_net.reset()
            self.__initial_state()  # reset initial state
            # long non-markovian test
            if self.print_status:
                print("Starting the 100k test...")
            score = self.__non_markov(best_net, 100000, testing)[1]

            if score > 99999:
                if self.print_status:
                    print(
                        "\tWinner passed the 100k test! Starting the generalization test..."
                    )
                # second: now let's try 625 different initial conditions
                balanced = self.__generalization_test(best_net, testing)

                if balanced > 200:
                    if self.print_status:
                        print(
                            f"\tWinner passed the generalization test with score: {balanced}\n"
                        )
                    # set chromosome's fitness to 100k (and ceases the simulation)
                    best.fitness = 100000
                    best.score = balanced
                else:
                    if self.print_status:
                        print(
                            f"\tWinner failed the generalization test with score: {balanced}\n"
                        )

            else:
                if self.print_status:
                    print(
                        f"\tWinner failed at the 100k test with score {score}\n"
                    )
Example #6
0
    def run(self, testing=False):
        """ Runs the cart-pole experiment and evaluates the population. """

        if self.__markov:
            # markov experiment: full system's information is provided to the network
            for chromo in self.__population:
                # chromosome to phenotype
                assert chromo.num_inputs == 6, "There must be 6 inputs to the network"
                net = nn.create_phenotype(chromo)

                self.__initial_state()

                if testing:
                    # cart's position, first pole's angle, second pole's angle
                    # print "\nInitial conditions:"
                    print "{0:f} \t {1:f} \t {2:f}".format(self.__state[0], self.__state[2], self.__state[4])
                    pass

                steps = 0

                while steps < 100000:
                    inputs = [self.__state[0] / 4.80,  # cart's initial position
                              self.__state[1] / 2.00,  # cart's initial speed
                              self.__state[2] / 0.52,  # pole_1 initial angle
                              self.__state[3] / 2.00,  # pole_1 initial angular velocity
                              self.__state[4] / 0.52,  # pole_2 initial angle
                              self.__state[5] / 2.00]  # pole_2 initial angular velocity

                    # activate the neural network
                    output = net.pactivate(inputs)
                    # maps [-1,1] onto [0,1]
                    action = 0.5 * (output[0] + 1.0)
                    # advances one time step
                    self.__state = integrate(action, self.__state, 1)

                    if self.__outside_bounds():
                        # network failed to solve the task
                        if testing:
                            print "Failed at step {0:d} \t {1:+1.2f} \t {2:+1.2f} \t {3:+1.2f}".format(steps, self.__state[0], self.__state[2], self.__state[4])
                            sys.exit(0)
                        else:
                            break
                    steps += 1

                chromo.fitness = float(steps)  # the higher the better
                # if self.print_status:
                #    print "Chromosome %3d evaluated with score: %d " %(chromo.id, chromo.fitness)

        else:
            # non-markovian: no velocity information is provided (only 3 inputs)
            for chromo in self.__population:
                assert chromo.num_inputs == 3, "There must be 3 inputs to the network"
                net = nn.create_phenotype(chromo)
                self.__initial_state()

                chromo.fitness, score = self.__non_markov(net, 1000, testing)

                # print "Chromosome %3d %s evaluated with fitness %2.5f and score: %s" %(chromo.id, chromo.size(), chromo.fitness, score)

            # we need to make sure that the found solution is robust enough and good at
            # generalizing for several different initial conditions, so the champion
            # from each generation (i.e., the one with the highest F) passes for a
            # generalization test (the criteria here was defined by Gruau)

            best = max(self.__population)  # selects the best network
            if self.print_status:
                print "\t\nBest chromosome of generation: {0:d}".format(best.ID)

            # ** *******************#
            #  GENERALIZATION TEST  #
            # **********************#

            # first: can it balance for at least 100k steps?
            best_net = nn.create_phenotype(best)
            best_net.flush()
            self.__initial_state()  # reset initial state
            # long non-markovian test
            if self.print_status:
                print "Starting the 100k test..."
            score = self.__non_markov(best_net, 100000, testing)[1]

            if score > 99999:
                if self.print_status:
                    print "\tWinner passed the 100k test! Starting the generalization test..."
                # second: now let's try 625 different initial conditions
                balanced = self.__generalization_test(best_net, testing)

                if balanced > 200:
                    if self.print_status:
                        print "\tWinner passed the generalization test with score: {0:d}\n".format(balanced)
                    # set chromosome's fitness to 100k (and ceases the simulation)
                    best.fitness = 100000
                    best.score = balanced
                else:
                    if self.print_status:
                        print "\tWinner failed the generalization test with score: {0:d}\n".format(balanced)

            else:
                if self.print_status:
                    print "\tWinner failed at the 100k test with score {0:d}\n ".format(score)