def eval_genomes(genomes, config):
    for genome_id, genome in genomes:

        genome.fitness = 0

        net = helper.create_net(genome, config)

        total_reward = 0
        observation = env.reset()
        max_pos = 0
        max_speed = 0
        print(genome_id)
        for _ in range(200):
            # if genome_id > 200:
            # env.render()
            actions = net.activate(observation)

            # eng_lr = helper.scale(0,1,-1,1, eng_lr)

            observation, reward, done, info = env.step(actions)

            total_reward += reward

            max_pos = max(max_pos, abs(observation[0]))
            max_speed = max(max_speed, abs(observation[1]))

            if done:
                break

        # Avg fitness over n runs
        genome.fitness = total_reward + max_pos + max_speed
        print(genome.fitness)
Esempio n. 2
0
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = helper.create_net(genome, config)
        # 1600 steps to "solve" as per environment requests (most are expected to fail anyway)
        # 24 inputs, 14 features + 10 lidar
        # 4 outputs, -1 to 1 for each joint
        total_reward = 0
        observation = env.reset()
        stopped_time = 0
        for _ in range(800):
            env.render()
            action = net.activate(observation)
            observation, reward, done, info = env.step(action)

            if abs(observation[2]) < 0.001:
                stopped_time += 1
            if stopped_time >= 50:
                reward = -100
                done = True

            total_reward += reward
            if done:
                break

        genome.fitness = total_reward
Esempio n. 3
0
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:

        genome.fitness = 0

        net = helper.create_net(genome, config)

        total_reward = 0
        observation = env.reset()

        for _ in range(1000):
            env.render()
            actions = net.activate(observation)

            # eng_lr = helper.scale(0,1,-1,1, eng_lr)
            a, _ = max(enumerate(actions), key=lambda i_s: i_s[1])

            observation, reward, done, info = env.step(a)

            total_reward += reward

            if done:
                break

        # Avg fitness over n runs
        genome.fitness = total_reward
Esempio n. 4
0
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = helper.create_net(genome, config)

        sse = 0

        for x, y in zip(x_test, y_test):
            y_pred = net.activate(x)
            sse += sum([(p - c)**2 for p, c in zip(y_pred, y)])

        genome.fitness = -sse
Esempio n. 5
0
def eval_genomes(genomes, config):

    for genome_id, genome in genomes:
        net = helper.create_net(genome, config)
        total_reward = 0

        observation = env.reset()

        for _ in range(200):
            cos_th, sin_th, th_dot = observation
            th_dot = helper.scale(-8, 8, -1, 1, th_dot)
            action = (net.activate([cos_th, sin_th, th_dot])[0])
            observation, reward, done, info = env.step([action])
            total_reward += reward

            if done:
                break

        genome.fitness = total_reward
Esempio n. 6
0
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = helper.create_net(genome, config)

        total_correct = 0
        # eval on the i'th bucket
        for x, y in zip(x_test, y_test):
            y_pred = net.activate(x)
            y_pred_i = max(range(len(y_pred)), key=lambda j: y_pred[j])
            y_test_i = max(range(len(y)), key=lambda j: y[j])

            # Get a measure of "how" correct it is
            correct = 1 - (
                (y[y_test_i] -
                 y_pred[y_pred_i]))**2 if y_pred_i == y_test_i else 0
            avg_diff = sum([(y_p - y_t)**2
                            for y_p, y_t in zip(y_pred, y)]) / len(y_pred)

            total_correct += correct - avg_diff
        genome.fitness = total_correct
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = helper.create_net(genome, config)
        # 1600 steps to "solve" as per environment requests (most are expected to fail anyway)
        # 24 inputs, 14 features + 10 lidar
        # 4 outputs, -1 to 1 for each joint
        total_reward = 0
        observation = env.reset()
        for _ in range(800):
            env.render()
            action = net.activate(observation)
            # These are all standard sigmoid outputs = [0,1]
            # Adjust to [-1,1]
            # for i,a in enumerate(action):
            #     action[i] = helper.scale(0,1,-1,1, a)
            observation, reward, done, info = env.step(action)
            total_reward += reward
            if done:
                break
        genome.fitness = total_reward
Esempio n. 8
0
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:

        genome.fitness = 0

        net = helper.create_net(genome, config)

        total_reward = 0
        observation = env.reset()
        max_pos = 0
        max_speed = 0
        min_speed = 0
        for _ in range(200):
            position_scaled = helper.scale(-1.2, 0.6, 0, 1, observation[0])
            velocity_scaled = helper.scale(-0.07, 0.07, -1, 1, observation[1])
            # if genome_id > 200:
            #     env.render()
            # print(position_scaled, velocity_scaled)
            actions = net.activate([position_scaled, velocity_scaled])
            action, _ = max(enumerate(actions), key=lambda i_s: i_s[1])
            # eng_lr = helper.scale(0,1,-1,1, eng_lr)

            observation, reward, done, info = env.step(action)

            total_reward += reward

            max_pos = max(max_pos, observation[0])
            max_speed = max(max_speed, abs(observation[1]))
            min_speed = min(min_speed, abs(observation[1]))

            if done:
                break

        # Avg fitness over n runs
        genome.fitness = total_reward + max_pos + max_speed + (max_speed -
                                                               min_speed)
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:

        genome.fitness = 0

        net = helper.create_net(genome, config)

        total_reward = 0 
        observation = env.reset()
        
        for _ in range(1000):
            # env.render()
            eng_m, eng_lr = net.activate(observation)
            # eng_lr = helper.scale(0,1,-1,1, eng_lr)

            observation,reward,done,info = env.step([eng_m, eng_lr])

            total_reward += reward

            if done:
                break
    
        # Avg fitness over n runs
        genome.fitness = total_reward
Esempio n. 10
0
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:

        total_reward = 0

        net = helper.create_net(genome, config)