def __init__(self, netsize, Nsensors=1, Nmotors=1): # Create ising model self.size = netsize #Network size self.Ssize = Nsensors # Number of sensors self.Msize = Nmotors # Number of sensors self.h = np.zeros(netsize) self.J = np.zeros((netsize, netsize)) self.max_weights = 2 self.randomize_state() self.env = BipedalWalker() #self.env.min_position = -1.5 * np.pi / 3 #self.env.max_position = 8.5 * np.pi / 3 self.env.goal_position = 1.5 * np.pi / 3 self.env.max_speed = .045 self.observation = self.env.reset() #self.max_angle_j0 = -0.61 #-35º #self.min_angle_j0 = -1.39 #-80º self.max_angle_j0 = -1 self.min_angle_j0 = -1 self.Beta = 1.0 self.defaultT = max(100, netsize * 20) self.Ssize1 = 0 #self.maxspeed = self.env.max_speed self.Update(-1)
def create_env(self, hardcore=False, super_easy=False): # Create a BipedalWalker environmnet if hardcore: self.max_number_steps = 2000 self.env = BipedalWalkerHardcore() elif super_easy: self.max_number_steps = 1600 self.env = BipedalWalkerSuperEasy() else: self.max_number_steps = 1600 self.env = BipedalWalker()
def _worker(self, rank, pipe, parent_pipe): parent_pipe.close() rng = np.random.RandomState(rank) env = BipedalWalker() obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] rnn = WorldModel(obs_dim, act_dim) ctrls = [Controller(obs_dim+rnn.hid_dim, act_dim) for _ in range(self.agents_per_worker)] while True: command, data = pipe.recv() if command == 'upload_rnn': # data: rnn rnn.load_state_dict(data.state_dict()) pipe.send((None, True)) elif command == 'upload_ctrl': # data: ([inds], noisy) inds, noisy = data for ctrl, ind in zip(ctrls, inds): if noisy: ind += rng.normal(scale=1e-3, size=ind.shape) ctrl.load_genotype(ind) pipe.send((None, True)) elif command == 'rollout': # data: random_policy rollouts = [] for ctrl in ctrls: env.seed(rng.randint(2**31-1)) if data: # if rollout with random policy trajectory = random_rollout(env) else: trajectory = rollout(env, rnn, ctrl) rollouts.append(trajectory) pipe.send((rollouts, True)) elif command == 'evaluate': # data: None evaluations = [] for ctrl in ctrls: env.seed(rng.randint(2**31-1)) evaluations.append(evaluate(env, rnn, ctrl)) pipe.send((evaluations, True)) elif command == 'close': # data: None env.close() pipe.send((None, True)) return True return False
from bipedal_walker import BipedalWalker from gp_gym_info import info import gym import numpy as np np.random.seed(1) info["env_name"] = "BipedalWalker-v3" info["pop_size"] = 10 info["max_gens"] = 5 info["num_eps"] = 10 info["num_time_steps"] = 100 info["tournament_size"] = 3 info["mutation_rate"] = 0.1 info["term_fit"] = 300 info["max_depth"] = 5 agent = BipedalWalker(info) best_program = agent.train() print(best_program) fitness = agent.fit(best_program, info["num_eps"], info["num_time_steps"], render=False) print("Fitness: {}".format(fitness)) agent.fit(best_program, 5, 100, render=True)
def main(args): print("IT'S DANGEROUS TO GO ALONE! TAKE THIS.") np.random.seed(0) pt.manual_seed(0) env = BipedalWalker() env.seed(0) obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] print(f"Initializing agent (device={device})...") rnn = WorldModel(obs_dim, act_dim) ctrl = Controller(obs_dim + rnn.hid_dim, act_dim) # Adjust population size based on the number of available CPUs. num_workers = mp.cpu_count() if args.nproc is None else args.nproc num_workers = min(num_workers, mp.cpu_count()) agents_per_worker = args.popsize // num_workers popsize = num_workers * agents_per_worker print(f"Initializing population with {popsize} workers...") pop = Population(num_workers, agents_per_worker) global_mu = np.zeros_like(ctrl.genotype) loss_logger = ValueLogger('ha_rnn_loss', bufsize=20) best_logger = ValueLogger('ha_ctrl_best', bufsize=100) # Train the RNN with random policies. print(f"Training M model with a random policy...") optimizer = optim.Adam(rnn.parameters(), lr=args.lr) train_rnn(rnn, optimizer, pop, random_policy=True, num_rollouts=args.num_rollouts, logger=loss_logger) loss_logger.plot('M model training loss', 'step', 'loss') # Upload the trained RNN. success = pop.upload_rnn(rnn.cpu()) assert success # Iteratively update controller and RNN. for i in range(args.niter): # Evolve controllers with the trained RNN. print(f"Iter. {i}: Evolving C model...") es = EvolutionStrategy(global_mu, args.sigma0, popsize) evolve_ctrl(ctrl, es, pop, num_gen=args.num_gen, logger=best_logger) best_logger.plot('C model evolution', 'gen', 'fitness') # Update the global best individual and upload them. global_mu = np.copy(ctrl.genotype) success = pop.upload_ctrl(global_mu, noisy=True) assert success # Train the RNN with the current best controller. print(f"Iter. {i}: Training M model...") train_rnn(rnn, optimizer, pop, random_policy=False, num_rollouts=args.num_rollouts, logger=loss_logger) loss_logger.plot('M model training loss', 'step', 'loss') # Upload the trained RNN. success = pop.upload_rnn(rnn.cpu()) assert success # Test run! rollout(env, rnn, ctrl, render=True) success = pop.close() assert success
def create_env(args): env = BipedalWalker(args.scale_legs) env = frame_stack(env, args) return env