Ejemplo n.º 1
0
def eval_nn(genotype, nbstep=2000, dump=False, render=False, name=""):
    nn = SimpleNeuralControllerNumpy(5, 2, 2, 10)
    nn.set_parameters(genotype)
    observation = env.reset()
    old_pos = None
    total_dist = 0
    fit = 0

    if (dump):
        f = open("traj" + name + ".log", "w")
    for t in range(nbstep):
        if render:
            env.render()
        action = nn.predict(observation)
        observation, reward, done, info = env.step(action)
        pos = info["robot_pos"][:2]
        if (dump):
            f.write(" ".join(map(str, pos)) + "\n")
        if (old_pos is not None):
            d = math.sqrt((pos[0] - old_pos[0])**2 + (pos[1] - old_pos[1])**2)
            total_dist += d
        old_pos = list(pos)
        if (done):
            break
    if (dump):
        f.close()
    dist_obj = info["dist_obj"]
    #print("End of eval, total_dist=%f"%(total_dist))

    if done:
        fit = 1

    return (math.sqrt((pos[0] - env.goalPos[0])**2 +
                      (pos[1] - env.goalPos[1])**2), pos)
Ejemplo n.º 2
0
def eval_nn(genotype, nbstep=2000, dump=False, render=False, name=""):
    nn=SimpleNeuralControllerNumpy(5,2,2,10)
    nn.set_parameters(genotype)
    observation = env.reset()
    old_pos=None
    total_dist=0
    if (dump):
        f=open("traj"+name+".log","w")
    for t in range(nbstep):
        if render:
            env.render()
        action=nn.predict(observation)
        observation, reward, done, info = env.step(action) 
        pos=info["robot_pos"][:2]
        if(dump):
            f.write(" ".join(map(str,pos))+"\n")
        if (old_pos is not None):
            d=math.sqrt((pos[0]-old_pos[0])**2+(pos[1]-old_pos[1])**2)
            total_dist+=d
        old_pos=list(pos)
        if(done):
            break
    if (dump):
        f.close()
    dist_obj=info["dist_obj"]
    #print("End of eval, total_dist=%f"%(total_dist))
    return ## A completer: une evaluation devra renvoyer la fitness utilisée, mais aussi le descripteur comportemental résultant de cette évaluation
Ejemplo n.º 3
0
def simulation(env, genotype, display=False):
    global but_atteint
    global size_nn
    nn = SimpleNeuralControllerNumpy(5, 2, 2, 10)
    if genotype != None:
        nn.set_parameters(genotype)
    observation = env.reset()
    if (display):
        env.enable_display()
    then = time.time()
    but = 0
    for i in range(800):
        env.render()
        action = nn.predict(observation)
        action = [i * env.maxVel for i in action]
        observation, reward, done, info = env.step(action)
        #print("Step %d Obs=%s  reward=%f  dist. to objective=%f  robot position=%s  End of ep=%s" % (i, str(observation), reward, info["dist_obj"], str(info["robot_pos"]), str(done)))
        if (display):
            time.sleep(0.01)
        if done:
            but_atteint = True
            but += 1
            break

    now = time.time()
    #print("%d timesteps took %f seconds" % (i, now - then))
    xg, yg = env.goalPos
    x, y, theta = env.get_robot_pos(
    )  # x,y,theta    ?? pourquoi theta??? to do
    return but, math.sqrt((x - xg)**2 + (y - yg)**2), [x, y]
Ejemplo n.º 4
0
def eval_nn(genotype, render=False, nbstep=500):
    total_reward = 0
    nn = SimpleNeuralControllerNumpy(4, 1, 2, 5)
    nn.set_parameters(genotype)

    ## à completer

    # utilisez render pour activer ou inhiber l'affichage (il est pratique de l'inhiber pendant les calculs et de ne l'activer que pour visualiser les résultats.
    # nbstep est le nombre de pas de temps. Plus il est grand, plus votre pendule sera stable, mais par contre, plus vos calculs seront longs. Vous pouvez donc ajuster cette
    # valeur pour accélérer ou ralentir vos calculs. Utilisez la valeur par défaut pour indiquer ce qui doit se passer pendant l'apprentissage, vous pourrez indiquer une
    # valeur plus importante pour visualiser le comportement du résultat obtenu.
    observation = env.reset()
    for i in range(nbstep):
        if render:
            env.render()
        action = nn.predict(observation)
        if action > 0:
            action = 1
        else:
            action = 0
        observation, reward, done, info = env.step(action)
        total_reward += reward
        if done:
            # print("Episode finished after %d timesteps"%(i+1))
            break

    return total_reward,
Ejemplo n.º 5
0
def eval_nn(genotype, render=False, verbose=False):
    sum_reward = 0
    sum_distance = 0
    sum_angle = 0
    _t = 0

    nn=SimpleNeuralControllerNumpy(4,1,2,5)
    nn.set_parameters(genotype)
    observation = _env.reset()
    for t in range(1000):
        _t = t
        sum_distance += abs(observation[0])
        sum_angle += abs(observation[2])

        if render:
            _env.render()
        action=nn.predict(observation)
        if action>0:
            action=1
        else:
            action=0
        observation, reward, done, info = _env.step(action)
        sum_reward +=reward
        if done:
            if verbose:
                print("Episode finished after %d timesteps"%(t+1))
            break

    if (_t < 500):
        sum_distance += abs(observation[0] * (500 - _t))
        sum_angle += abs(observation[2] * (500 - _t))
    
    return sum_reward, sum_distance, sum_angle
Ejemplo n.º 6
0
def evolution_nn(genotype, render=False, verbose=False):
    hist_distance = []
    hist_angle = []

    nn=SimpleNeuralControllerNumpy(4,1,2,5)
    nn.set_parameters(genotype)
    observation = _env.reset()
    for t in range(1000):
        hist_distance.append(abs(observation[0]))
        hist_angle.append(abs(observation[2]))

        if render:
            _env.render()
        action=nn.predict(observation)
        if action>0:
            action=1
        else:
            action=0
        observation, reward, done, info = _env.step(action)
       
        if done:
            if verbose:
                print("Episode finished after %d timesteps"%(t+1))
            break

  
    return hist_distance, hist_angle
Ejemplo n.º 7
0
def eval_nn(env, genotype, render=False):
    energie = 500
    nn = SimpleNeuralControllerNumpy(4, 1, 2, 10)
    nn.set_parameters(genotype)
    observation = env.reset()
    x = 0
    y = 0
    for t in range(energie):
        if render:
            env.render()
            time.sleep(0.05)
        action = nn.predict(observation)
        if action > 0:
            action = 1
        else:
            action = 0
        observation, reward, done, info = env.step(action)
        x += abs(observation[0])
        y += abs(observation[2])
        if done:
            break
    x = x / t
    x += (energie - t) * 2.4  #penalisation pour les tours restant
    y = y / t
    y += (energie - t) * 41.8  #penalisation pour les tours restant
    return x, y
Ejemplo n.º 8
0
def eval_nn(genotype, render=False):
    total_reward=0
    nn=SimpleNeuralControllerNumpy(4,1,2,5)
    nn.set_parameters(genotype)
    observation = env.reset()
        for t in range(1000):
            if render:
                env.render()
            action=nn.predict(observation)
            if action>0:
                action=1
            else:
                action=0
            observation, reward, done, info = env.step(action) 
            total_reward+=reward
            if done:
                print("Episode finished after %d timesteps"%(t+1))
                break        
Ejemplo n.º 9
0
### A completer pour optimiser les parametres du reseau de neurones avec CMA-ES ###

nn = SimpleNeuralControllerNumpy(4, 1, 2, 5)
nn.init_random_params()
res = launch_cmaes_full_genotype(nn.get_parameters(),
                                 sigma,
                                 nbeval=1000,
                                 display=True,
                                 ma_func=eval_nn)
nn.set_parameters(res)

env.reset()

r = env.step(env.action_space.sample())  # take a random action
observations = r[0]
reward = r[1]
done = r[2]
print(nn.predict(observations))
for _ in range(1000):
    env.render()
    action = nn.predict(observations)
    if action > 0:
        action = 1
    else:
        action = 0
    r = env.step(action)  # take a random action
    observations = r[0]
    reward = r[1]
    done = r[2]  # take a random action
env.close()