예제 #1
0
    def __init__(self, netsize, Nsensors=1, Nmotors=1):  # Create ising model

        self.size = netsize  #Network size
        self.Ssize = Nsensors  # Number of sensors
        self.Msize = Nmotors  # Number of sensors

        self.h = np.zeros(netsize)
        self.J = np.zeros((netsize, netsize))
        self.max_weights = 2

        self.randomize_state()

        self.env = BipedalWalker()
        #self.env.min_position = -1.5 * np.pi / 3
        #self.env.max_position = 8.5 * np.pi / 3
        self.env.goal_position = 1.5 * np.pi / 3
        self.env.max_speed = .045
        self.observation = self.env.reset()
        #self.max_angle_j0 = -0.61 #-35º
        #self.min_angle_j0 = -1.39 #-80º
        self.max_angle_j0 = -1
        self.min_angle_j0 = -1

        self.Beta = 1.0
        self.defaultT = max(100, netsize * 20)

        self.Ssize1 = 0
        #self.maxspeed = self.env.max_speed
        self.Update(-1)
예제 #2
0
 def create_env(self, hardcore=False, super_easy=False):
     # Create a BipedalWalker environmnet
     if hardcore:
         self.max_number_steps = 2000
         self.env = BipedalWalkerHardcore()
     elif super_easy:
         self.max_number_steps = 1600
         self.env = BipedalWalkerSuperEasy()
     else:
         self.max_number_steps = 1600
         self.env = BipedalWalker()
예제 #3
0
  def _worker(self, rank, pipe, parent_pipe):
    parent_pipe.close()

    rng = np.random.RandomState(rank)

    env = BipedalWalker()
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    rnn = WorldModel(obs_dim, act_dim)
    ctrls = [Controller(obs_dim+rnn.hid_dim, act_dim)
             for _ in range(self.agents_per_worker)]
  
    while True:
      command, data = pipe.recv()

      if command == 'upload_rnn': # data: rnn
        rnn.load_state_dict(data.state_dict())
        pipe.send((None, True))

      elif command == 'upload_ctrl': # data: ([inds], noisy)
        inds, noisy = data
        for ctrl, ind in zip(ctrls, inds):
          if noisy:
            ind += rng.normal(scale=1e-3, size=ind.shape)
          ctrl.load_genotype(ind)
        pipe.send((None, True))

      elif command == 'rollout': # data: random_policy
        rollouts = []
        for ctrl in ctrls:
          env.seed(rng.randint(2**31-1))
          if data: # if rollout with random policy
            trajectory = random_rollout(env)
          else:
            trajectory = rollout(env, rnn, ctrl)
          rollouts.append(trajectory)
        pipe.send((rollouts, True))

      elif command == 'evaluate': # data: None
        evaluations = []
        for ctrl in ctrls:
          env.seed(rng.randint(2**31-1))
          evaluations.append(evaluate(env, rnn, ctrl))
        pipe.send((evaluations, True))

      elif command == 'close': # data: None
        env.close()
        pipe.send((None, True))
        return True

    return False
from bipedal_walker import BipedalWalker
from gp_gym_info import info
import gym
import numpy as np

np.random.seed(1)

info["env_name"] = "BipedalWalker-v3"
info["pop_size"] = 10
info["max_gens"] = 5
info["num_eps"] = 10
info["num_time_steps"] = 100
info["tournament_size"] = 3
info["mutation_rate"] = 0.1
info["term_fit"] = 300
info["max_depth"] = 5

agent = BipedalWalker(info)
best_program = agent.train()
print(best_program)
fitness = agent.fit(best_program,
                    info["num_eps"],
                    info["num_time_steps"],
                    render=False)
print("Fitness: {}".format(fitness))

agent.fit(best_program, 5, 100, render=True)
예제 #5
0
def main(args):
    print("IT'S DANGEROUS TO GO ALONE! TAKE THIS.")

    np.random.seed(0)
    pt.manual_seed(0)

    env = BipedalWalker()
    env.seed(0)

    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    print(f"Initializing agent (device={device})...")
    rnn = WorldModel(obs_dim, act_dim)
    ctrl = Controller(obs_dim + rnn.hid_dim, act_dim)

    # Adjust population size based on the number of available CPUs.
    num_workers = mp.cpu_count() if args.nproc is None else args.nproc
    num_workers = min(num_workers, mp.cpu_count())
    agents_per_worker = args.popsize // num_workers
    popsize = num_workers * agents_per_worker

    print(f"Initializing population with {popsize} workers...")
    pop = Population(num_workers, agents_per_worker)
    global_mu = np.zeros_like(ctrl.genotype)

    loss_logger = ValueLogger('ha_rnn_loss', bufsize=20)
    best_logger = ValueLogger('ha_ctrl_best', bufsize=100)

    # Train the RNN with random policies.
    print(f"Training M model with a random policy...")
    optimizer = optim.Adam(rnn.parameters(), lr=args.lr)
    train_rnn(rnn,
              optimizer,
              pop,
              random_policy=True,
              num_rollouts=args.num_rollouts,
              logger=loss_logger)
    loss_logger.plot('M model training loss', 'step', 'loss')

    # Upload the trained RNN.
    success = pop.upload_rnn(rnn.cpu())
    assert success

    # Iteratively update controller and RNN.
    for i in range(args.niter):
        # Evolve controllers with the trained RNN.
        print(f"Iter. {i}: Evolving C model...")
        es = EvolutionStrategy(global_mu, args.sigma0, popsize)
        evolve_ctrl(ctrl, es, pop, num_gen=args.num_gen, logger=best_logger)
        best_logger.plot('C model evolution', 'gen', 'fitness')

        # Update the global best individual and upload them.
        global_mu = np.copy(ctrl.genotype)
        success = pop.upload_ctrl(global_mu, noisy=True)
        assert success

        # Train the RNN with the current best controller.
        print(f"Iter. {i}: Training M model...")
        train_rnn(rnn,
                  optimizer,
                  pop,
                  random_policy=False,
                  num_rollouts=args.num_rollouts,
                  logger=loss_logger)
        loss_logger.plot('M model training loss', 'step', 'loss')

        # Upload the trained RNN.
        success = pop.upload_rnn(rnn.cpu())
        assert success

        # Test run!
        rollout(env, rnn, ctrl, render=True)

    success = pop.close()
    assert success
예제 #6
0
def create_env(args):
    env = BipedalWalker(args.scale_legs)
    env = frame_stack(env, args)
    return env