def run_ES(population, i): model = population[0].genome es = EvolutionStrategy(model, get_reward, population_size=POPULATION_SIZE, sigma=0.25, learning_rate=0.03, decay=0.998, num_threads=2) es.run(5000, print_step=5, start=i) optimized = es.get_weights()
def find_shapelets_es(timeseries, labels, max_len=100, min_len=1, population_size=100, iterations=25, verbose=True, sigma=0.1, learning_rate=0.001): def cost(shapelet): return check_candidate(timeseries, labels, shapelet)[0] candidates = np.array(generate_candidates(timeseries, labels, max_len, min_len)) es = EvolutionStrategy(candidates[np.random.choice(range(len(candidates)), size=population_size)][0][0], cost, population_size=population_size, sigma=sigma, learning_rate=learning_rate) es.run(iterations, print_step=1) best_shapelet = es.get_weights() return best_shapelet
import argparse parser = argparse.ArgumentParser(description='Evolution Strategies. ') parser.add_argument('--env', default="Humanoid-v2") parser.add_argument('--render', type=bool, default=False) args = parser.parse_args() observationSpace, actionSpace = env_info(args.env) # A feed forward neural network with input size of 5, two hidden layers of size 4 and output of size 3 model = FeedForwardNetwork(layer_sizes=[observationSpace, 32, 16, actionSpace]) get_reward = make_get_reward(args.env, model, args.render) # if your task is computationally expensive, you can use num_threads > 1 to use multiple processes; # if you set num_threads=-1, it will use number of cores available on the machine; Here we use 1 process as the # task is not computationally expensive and using more processes would decrease the performance due to the IPC overhead. es = EvolutionStrategy(model.get_weights(), get_reward, population_size=20, sigma=0.1, learning_rate=0.03, decay=0.995, num_threads=1) es.run(1000, print_step=100) with open(args.env + ".pkl", 'wb') as fp: pickle.dump(es.get_weights(), fp) #while True: # print(get_reward(es.get_weights(),True))
class Agent: AGENT_HISTORY_LENGTH = 1 NUM_OF_ACTIONS = 2 POPULATION_SIZE = 15 EPS_AVG = 1 SIGMA = 0.1 LEARNING_RATE = 0.03 INITIAL_EXPLORATION = 0.0 FINAL_EXPLORATION = 0.0 EXPLORATION_DEC_STEPS = 100000 def __init__(self): self.model = Model() self.game = FlappyBird(pipe_gap=125) self.env = PLE(self.game, fps=30, display_screen=False) self.env.init() self.env.getGameState = self.game.getGameState self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE) self.exploration = self.INITIAL_EXPLORATION def get_predicted_action(self, sequence): prediction = self.model.predict(np.array(sequence)) x = np.argmax(prediction) return 119 if x == 1 else None def load(self, filename='weights.pkl'): with open(filename,'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def get_observation(self): state = self.env.getGameState() return np.array(state.values()) def save(self, filename='weights.pkl'): with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes): self.env.display_screen = True self.model.set_weights(self.es.weights) for episode in xrange(episodes): self.env.reset_game() observation = self.get_observation() sequence = [observation]*self.AGENT_HISTORY_LENGTH done = False score = 0 while not done: action = self.get_predicted_action(sequence) reward = self.env.act(action) observation = self.get_observation() sequence = sequence[1:] sequence.append(observation) done = self.env.game_over() if self.game.getScore() > score: score = self.game.getScore() print "score: %d" % score self.env.display_screen = False def train(self, iterations): self.es.run(iterations, print_step=1) def get_reward(self, weights): total_reward = 0.0 self.model.set_weights(weights) for episode in xrange(self.EPS_AVG): self.env.reset_game() observation = self.get_observation() sequence = [observation]*self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max(self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION/self.EXPLORATION_DEC_STEPS) if random.random() < self.exploration: action = random.choice([119, None]) else: action = self.get_predicted_action(sequence) reward = self.env.act(action) reward += random.choice([0.0001, -0.0001]) total_reward += reward observation = self.get_observation() sequence = sequence[1:] sequence.append(observation) done = self.env.game_over() return total_reward/self.EPS_AVG
class Agent: AGENT_HISTORY_LENGTH = 1 POPULATION_SIZE = 25 EPS_AVG = 1 SIGMA = 0.5 LEARNING_RATE = 0.1 INITIAL_EXPLORATION = 1.0 FINAL_EXPLORATION = 0.0 EXPLORATION_DEC_STEPS = 100000 def __init__(self): self.env = gym.make('BipedalWalker-v2') self.model = Model() self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE) self.exploration = self.INITIAL_EXPLORATION def get_predicted_action(self, sequence): prediction = self.model.predict(np.array(sequence)) return prediction def load(self, filename='weights.pkl'): with open(filename, 'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def save(self, filename='weights.pkl'): with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes, render=True): self.model.set_weights(self.es.weights) for episode in range(episodes): total_reward = 0 observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: if render: self.env.render() action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) print("total reward:", total_reward) def train(self, iterations): self.es.run(iterations, print_step=1) def get_reward(self, weights): total_reward = 0.0 self.model.set_weights(weights) for episode in range(self.EPS_AVG): observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max( self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS) if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) return total_reward / self.EPS_AVG
class Agent: agent_hist = 1 population = 50 eps_avg = 1 sigma = 0.2 #learning Rate lr = 0.1 init_explore = 0.9 final_explore = 0.1 explore_steps = 1E+5 def __init__(self): # Initializes environment, Model, Algorithm and Exploration self.env = gym.make(GYM_ENV) self.model = Model() self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.population, self.sigma, self.lr) self.exploration = self.init_explore def get_predicted_action(self, sequence): # Retreive the predicted action prediction = self.model.predict(np.array(sequence)) return prediction def load(self, filename='weights.pkl'): # Loads weights for agent_play with open(filename, 'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def save(self, filename='weights.pkl'): # Saves weigths to Pickle file with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes, render=True): # Run the model in the OpenAI environment self.model.set_weights(self.es.weights) for episode in range(episodes): total_reward = 0 observation = self.env.reset() sequence = [observation] * self.agent_hist done = False while not done: if render: self.env.render() action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) print("Total reward:", total_reward) def train(self, iterations): # Begin training self.es.run(iterations, print_step=1) def get_reward(self, weights): # Initialize reward total_reward = 0.0 self.model.set_weights(weights) # Calculate reward for episode in range(self.eps_avg): observation = self.env.reset() sequence = [observation] * self.agent_hist done = False while not done: self.exploration = max( self.final_explore, self.exploration - self.init_explore / self.explore_steps) if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) return total_reward / self.eps_avg
class Agent: """The agent class.""" ENV_ID = 'BipedalWalker-v2' # This is the number of the history obervations used in action prediction. AGENT_HISTORY_LENGTH = 1 POPULATION_SIZE = 20 EPS_AVG = 1 SIGMA = 0.1 LEARNING_RATE = 0.01 # The following three parameters control the exlporation probabilities. # It starts with INITIAL_EXPLORATION, ends with FINAL_EXPLORATION after # EXLPORATION_DEC_STEPS steps. INITIAL_EXPLORATION = 1.0 FINAL_EXPLORATION = 0.0 EXPLORATION_DEC_STEPS = 1000000 def __init__(self): """Initialize the agent.""" # Initialize the openai-gym environment. self.env = gym.make(self.ENV_ID) # uncomment following lines if you want to record the video # self.env = gym.wrappers.Monitor(self.env, "{}_monitor".format(self.ENV_ID), # lambda episode_id: True, force=True) # Initialze the training model. self.model = Model() # Initialize the evolution strategy of evostra self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE) self.exploration = self.INITIAL_EXPLORATION self.exploration_dec = self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS def train(self, iterations=100, print_step=1, filename='weights.pkl'): """Train the model.""" self.es.run(iterations, print_step=print_step) self.save(filename) def load(self, filename='weights.pkl'): """Load the model weights from file.""" with open(filename, 'rb') as fp: self.model.set_weights(pickle.load(fp, encoding='bytes')) self.es.weights = self.model.get_weights() def save(self, filename='weights.pkl'): """Save the weights of current model into file.""" with open(filename, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def play(self, episodes=1, render=True): """Play the agent for episodes.""" self.model.set_weights(self.es.weights) for episode in range(episodes): total_reward = 0 # Get the initial observation. observation = self.env.reset() # Fill the observation sequence with repeated initial obsercations # for AGENT_HISTORY_LENGTH times. sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: if render: # Visualize. self.env.render() action = self.get_predicted_action(sequence) # Get the results of the action. observation, reward, done, _ = self.env.step(action) total_reward += reward # Shift the observation sequence to include the new one. sequence = sequence[1:] sequence.append(observation) print("total reward: ", total_reward) def get_predicted_action(self, sequence): """Get the model's predicted action based on sequence of states.""" prediction = self.model.predict(np.array(sequence)) return prediction def get_reward(self, weights): """Get the reward of the current model based on EPS_AVG times of tests.""" total_reward = 0.0 self.model.set_weights(weights) # Run tests for EPS_AVG times. for episode in range(self.EPS_AVG): # Get the initial observation. observation = self.env.reset() # Fill the observation sequence with repeated initial obsercations # for AGENT_HISTORY_LENGTH times. sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max(self.FINAL_EXPLORATION, self.exploration - self.exploration_dec) # Randomize exploration. if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) # Get the results of the action. observation, reward, done, _ = self.env.step(action) total_reward += reward # Shift the observation sequence to include the new one. sequence = sequence[1:] sequence.append(observation) return total_reward / self.EPS_AVG
class Agent: def __init__(self, model, training_steps=500, environment='BipedalWalker-v2', AGENT_HISTORY_LENGTH=1, POPULATION_SIZE=50, EPS_AVG=1, SIGMA=0.1, LEARNING_RATE=0.01, INITIAL_EXPLORATION=1.0, FINAL_EXPLORATION=0.0, EXPLORATION_DEC_STEPS=10000, num_thread=1, LR_mode=0): self.env = gym.make(environment) self.model = model self.exploration = INITIAL_EXPLORATION self.training_steps = training_steps self.AGENT_HISTORY_LENGTH = AGENT_HISTORY_LENGTH self.POPULATION_SIZE = POPULATION_SIZE self.EPS_AVG = EPS_AVG self.SIGMA = SIGMA self.LEARNING_RATE = LEARNING_RATE self.INITIAL_EXPLORATION = INITIAL_EXPLORATION self.FINAL_EXPLORATION = FINAL_EXPLORATION self.EXPLORATION_DEC_STEPS = EXPLORATION_DEC_STEPS self.num_thread = num_thread self.LR_mode = LR_mode self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE, num_threads=num_thread, LR_mode=self.LR_mode) def get_predicted_action(self, sequence): prediction = self.model.predict(np.array(sequence)) return prediction def load(self, model_file): with open(model_file, 'rb') as fp: self.model.set_weights(pickle.load(fp)) self.es.weights = self.model.get_weights() def save(self, model_file): with open(model_file, 'wb') as fp: pickle.dump(self.es.get_weights(), fp) def train(self, iterations): print('Training') self.es.run(iterations, print_step=1) optimized_weights = self.es.get_weights() self.model.set_weights(optimized_weights) def play(self, episodes, render=True): self.model.set_weights(self.es.weights) for episode in range(episodes): print('On episode number {}'.format(episode)) total_reward = 0 observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: if render: self.env.render() action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) print("total reward:", total_reward) def get_reward(self, weights): total_reward = 0.0 self.model.set_weights(weights) for episode in range(self.EPS_AVG): start_time = time.time() observation = self.env.reset() sequence = [observation] * self.AGENT_HISTORY_LENGTH done = False while not done: self.exploration = max( self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS) if random.random() < self.exploration: action = self.env.action_space.sample() else: action = self.get_predicted_action(sequence) observation, reward, done, _ = self.env.step(action) total_reward += reward sequence = sequence[1:] sequence.append(observation) #print("total reward: ", total_reward) #print('Finished in {} seconds'.format(time.time() - start_time)) return total_reward / self.EPS_AVG