def run_ES(population, i):
    model = population[0].genome

    es = EvolutionStrategy(model,
                           get_reward,
                           population_size=POPULATION_SIZE,
                           sigma=0.25,
                           learning_rate=0.03,
                           decay=0.998,
                           num_threads=2)
    es.run(5000, print_step=5, start=i)
    optimized = es.get_weights()
Beispiel #2
0
def find_shapelets_es(timeseries, labels, max_len=100, min_len=1, population_size=100,
                       iterations=25, verbose=True, sigma=0.1, learning_rate=0.001):

    def cost(shapelet):
        return check_candidate(timeseries, labels, shapelet)[0]

    candidates = np.array(generate_candidates(timeseries, labels, max_len, min_len))

    es = EvolutionStrategy(candidates[np.random.choice(range(len(candidates)), size=population_size)][0][0],
                           cost, population_size=population_size, sigma=sigma, learning_rate=learning_rate)
    es.run(iterations, print_step=1)

    best_shapelet = es.get_weights()
    return best_shapelet
Beispiel #3
0
import argparse

parser = argparse.ArgumentParser(description='Evolution Strategies. ')
parser.add_argument('--env', default="Humanoid-v2")
parser.add_argument('--render', type=bool, default=False)

args = parser.parse_args()

observationSpace, actionSpace = env_info(args.env)

# A feed forward neural network with input size of 5, two hidden layers of size 4 and output of size 3
model = FeedForwardNetwork(layer_sizes=[observationSpace, 32, 16, actionSpace])

get_reward = make_get_reward(args.env, model, args.render)
# if your task is computationally expensive, you can use num_threads > 1 to use multiple processes;
# if you set num_threads=-1, it will use number of cores available on the machine; Here we use 1 process as the
#  task is not computationally expensive and using more processes would decrease the performance due to the IPC overhead.
es = EvolutionStrategy(model.get_weights(),
                       get_reward,
                       population_size=20,
                       sigma=0.1,
                       learning_rate=0.03,
                       decay=0.995,
                       num_threads=1)
es.run(1000, print_step=100)
with open(args.env + ".pkl", 'wb') as fp:
    pickle.dump(es.get_weights(), fp)
#while True:
#   print(get_reward(es.get_weights(),True))
Beispiel #4
0
class Agent:

    AGENT_HISTORY_LENGTH = 1
    NUM_OF_ACTIONS = 2
    POPULATION_SIZE = 15
    EPS_AVG = 1
    SIGMA = 0.1
    LEARNING_RATE = 0.03
    INITIAL_EXPLORATION = 0.0
    FINAL_EXPLORATION = 0.0
    EXPLORATION_DEC_STEPS = 100000


    def __init__(self):
        self.model = Model()
        self.game = FlappyBird(pipe_gap=125)
        self.env = PLE(self.game, fps=30, display_screen=False)
        self.env.init()
        self.env.getGameState = self.game.getGameState
        self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward, self.POPULATION_SIZE, self.SIGMA, self.LEARNING_RATE)
        self.exploration = self.INITIAL_EXPLORATION


    def get_predicted_action(self, sequence):
        prediction = self.model.predict(np.array(sequence))
        x = np.argmax(prediction)
        return 119 if x == 1 else None


    def load(self, filename='weights.pkl'):
        with open(filename,'rb') as fp:
            self.model.set_weights(pickle.load(fp))
        self.es.weights = self.model.get_weights()


    def get_observation(self):
        state = self.env.getGameState()
        return np.array(state.values())
    

    def save(self, filename='weights.pkl'):
        with open(filename, 'wb') as fp:
            pickle.dump(self.es.get_weights(), fp)

    
    def play(self, episodes):
        self.env.display_screen = True
        self.model.set_weights(self.es.weights)
        for episode in xrange(episodes):
            self.env.reset_game()
            observation = self.get_observation()
            sequence = [observation]*self.AGENT_HISTORY_LENGTH
            done = False
            score = 0
            while not done:
                action = self.get_predicted_action(sequence)
                reward = self.env.act(action)
                observation = self.get_observation()
                sequence = sequence[1:]
                sequence.append(observation)
                done = self.env.game_over()
                if self.game.getScore() > score:
                    score = self.game.getScore()
                    print "score: %d" % score
        self.env.display_screen = False


    def train(self, iterations):
        self.es.run(iterations, print_step=1)


    def get_reward(self, weights):
        total_reward = 0.0
        self.model.set_weights(weights)

        for episode in xrange(self.EPS_AVG):
            self.env.reset_game()
            observation = self.get_observation()
            sequence = [observation]*self.AGENT_HISTORY_LENGTH
            done = False
            while not done:
                self.exploration = max(self.FINAL_EXPLORATION, self.exploration - self.INITIAL_EXPLORATION/self.EXPLORATION_DEC_STEPS)
                if random.random() < self.exploration:
                    action = random.choice([119, None])
                else:
                    action = self.get_predicted_action(sequence)
                reward = self.env.act(action)
                reward += random.choice([0.0001, -0.0001])
                total_reward += reward
                observation = self.get_observation()
                sequence = sequence[1:]
                sequence.append(observation)
                done = self.env.game_over()

        return total_reward/self.EPS_AVG
Beispiel #5
0
class Agent:
    AGENT_HISTORY_LENGTH = 1
    POPULATION_SIZE = 25
    EPS_AVG = 1
    SIGMA = 0.5
    LEARNING_RATE = 0.1
    INITIAL_EXPLORATION = 1.0
    FINAL_EXPLORATION = 0.0
    EXPLORATION_DEC_STEPS = 100000

    def __init__(self):
        self.env = gym.make('BipedalWalker-v2')
        self.model = Model()
        self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward,
                                    self.POPULATION_SIZE, self.SIGMA,
                                    self.LEARNING_RATE)
        self.exploration = self.INITIAL_EXPLORATION

    def get_predicted_action(self, sequence):
        prediction = self.model.predict(np.array(sequence))
        return prediction

    def load(self, filename='weights.pkl'):
        with open(filename, 'rb') as fp:
            self.model.set_weights(pickle.load(fp))
        self.es.weights = self.model.get_weights()

    def save(self, filename='weights.pkl'):
        with open(filename, 'wb') as fp:
            pickle.dump(self.es.get_weights(), fp)

    def play(self, episodes, render=True):
        self.model.set_weights(self.es.weights)
        for episode in range(episodes):
            total_reward = 0
            observation = self.env.reset()
            sequence = [observation] * self.AGENT_HISTORY_LENGTH
            done = False
            while not done:
                if render:
                    self.env.render()
                action = self.get_predicted_action(sequence)
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                sequence = sequence[1:]
                sequence.append(observation)
            print("total reward:", total_reward)

    def train(self, iterations):
        self.es.run(iterations, print_step=1)

    def get_reward(self, weights):
        total_reward = 0.0
        self.model.set_weights(weights)

        for episode in range(self.EPS_AVG):
            observation = self.env.reset()
            sequence = [observation] * self.AGENT_HISTORY_LENGTH
            done = False
            while not done:
                self.exploration = max(
                    self.FINAL_EXPLORATION, self.exploration -
                    self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS)
                if random.random() < self.exploration:
                    action = self.env.action_space.sample()
                else:
                    action = self.get_predicted_action(sequence)
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                sequence = sequence[1:]
                sequence.append(observation)

        return total_reward / self.EPS_AVG
Beispiel #6
0
class Agent:
    agent_hist = 1
    population = 50
    eps_avg = 1
    sigma = 0.2
    #learning Rate
    lr = 0.1
    init_explore = 0.9
    final_explore = 0.1
    explore_steps = 1E+5

    def __init__(self):
        # Initializes environment, Model, Algorithm and Exploration
        self.env = gym.make(GYM_ENV)
        self.model = Model()
        self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward,
                                    self.population, self.sigma, self.lr)
        self.exploration = self.init_explore

    def get_predicted_action(self, sequence):
        # Retreive the predicted action
        prediction = self.model.predict(np.array(sequence))
        return prediction

    def load(self, filename='weights.pkl'):
        # Loads weights for agent_play
        with open(filename, 'rb') as fp:
            self.model.set_weights(pickle.load(fp))
        self.es.weights = self.model.get_weights()

    def save(self, filename='weights.pkl'):
        # Saves weigths to Pickle file
        with open(filename, 'wb') as fp:
            pickle.dump(self.es.get_weights(), fp)

    def play(self, episodes, render=True):
        # Run the model in the OpenAI environment
        self.model.set_weights(self.es.weights)
        for episode in range(episodes):
            total_reward = 0
            observation = self.env.reset()
            sequence = [observation] * self.agent_hist
            done = False
            while not done:
                if render:
                    self.env.render()
                action = self.get_predicted_action(sequence)
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                sequence = sequence[1:]
                sequence.append(observation)
            print("Total reward:", total_reward)

    def train(self, iterations):
        # Begin training
        self.es.run(iterations, print_step=1)

    def get_reward(self, weights):
        # Initialize reward
        total_reward = 0.0
        self.model.set_weights(weights)

        # Calculate reward
        for episode in range(self.eps_avg):
            observation = self.env.reset()
            sequence = [observation] * self.agent_hist
            done = False
            while not done:
                self.exploration = max(
                    self.final_explore,
                    self.exploration - self.init_explore / self.explore_steps)
                if random.random() < self.exploration:
                    action = self.env.action_space.sample()
                else:
                    action = self.get_predicted_action(sequence)
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                sequence = sequence[1:]
                sequence.append(observation)
        return total_reward / self.eps_avg
Beispiel #7
0
class Agent:
    """The agent class."""

    ENV_ID = 'BipedalWalker-v2'
    # This is the number of the history obervations used in action prediction.
    AGENT_HISTORY_LENGTH = 1
    POPULATION_SIZE = 20
    EPS_AVG = 1
    SIGMA = 0.1
    LEARNING_RATE = 0.01
    # The following three parameters control the exlporation probabilities.
    # It starts with INITIAL_EXPLORATION, ends with FINAL_EXPLORATION after
    # EXLPORATION_DEC_STEPS steps.
    INITIAL_EXPLORATION = 1.0
    FINAL_EXPLORATION = 0.0
    EXPLORATION_DEC_STEPS = 1000000

    def __init__(self):
        """Initialize the agent."""
        # Initialize the openai-gym environment.
        self.env = gym.make(self.ENV_ID)

        # uncomment following lines if you want to record the video
        # self.env = gym.wrappers.Monitor(self.env, "{}_monitor".format(self.ENV_ID),
        #     lambda episode_id: True, force=True)

        # Initialze the training model.
        self.model = Model()
        # Initialize the evolution strategy of evostra
        self.es = EvolutionStrategy(self.model.get_weights(), self.get_reward,
                                    self.POPULATION_SIZE, self.SIGMA,
                                    self.LEARNING_RATE)
        self.exploration = self.INITIAL_EXPLORATION
        self.exploration_dec = self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS

    def train(self, iterations=100, print_step=1, filename='weights.pkl'):
        """Train the model."""
        self.es.run(iterations, print_step=print_step)
        self.save(filename)

    def load(self, filename='weights.pkl'):
        """Load the model weights from file."""
        with open(filename, 'rb') as fp:
            self.model.set_weights(pickle.load(fp, encoding='bytes'))
        self.es.weights = self.model.get_weights()

    def save(self, filename='weights.pkl'):
        """Save the weights of current model into file."""
        with open(filename, 'wb') as fp:
            pickle.dump(self.es.get_weights(), fp)

    def play(self, episodes=1, render=True):
        """Play the agent for episodes."""
        self.model.set_weights(self.es.weights)

        for episode in range(episodes):
            total_reward = 0
            # Get the initial observation.
            observation = self.env.reset()
            # Fill the observation sequence with repeated initial obsercations
            # for AGENT_HISTORY_LENGTH times.
            sequence = [observation] * self.AGENT_HISTORY_LENGTH
            done = False
            while not done:
                if render:
                    # Visualize.
                    self.env.render()
                action = self.get_predicted_action(sequence)
                # Get the results of the action.
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                # Shift the observation sequence to include the new one.
                sequence = sequence[1:]
                sequence.append(observation)

            print("total reward: ", total_reward)

    def get_predicted_action(self, sequence):
        """Get the model's predicted action based on sequence of states."""
        prediction = self.model.predict(np.array(sequence))
        return prediction

    def get_reward(self, weights):
        """Get the reward of the current model based on EPS_AVG times of
        tests."""
        total_reward = 0.0
        self.model.set_weights(weights)

        # Run tests for EPS_AVG times.
        for episode in range(self.EPS_AVG):
            # Get the initial observation.
            observation = self.env.reset()
            # Fill the observation sequence with repeated initial obsercations
            # for AGENT_HISTORY_LENGTH times.
            sequence = [observation] * self.AGENT_HISTORY_LENGTH
            done = False
            while not done:
                self.exploration = max(self.FINAL_EXPLORATION,
                                       self.exploration - self.exploration_dec)
                # Randomize exploration.
                if random.random() < self.exploration:
                    action = self.env.action_space.sample()
                else:
                    action = self.get_predicted_action(sequence)
                # Get the results of the action.
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                # Shift the observation sequence to include the new one.
                sequence = sequence[1:]
                sequence.append(observation)

        return total_reward / self.EPS_AVG
Beispiel #8
0
class Agent:
    def __init__(self,
                 model,
                 training_steps=500,
                 environment='BipedalWalker-v2',
                 AGENT_HISTORY_LENGTH=1,
                 POPULATION_SIZE=50,
                 EPS_AVG=1,
                 SIGMA=0.1,
                 LEARNING_RATE=0.01,
                 INITIAL_EXPLORATION=1.0,
                 FINAL_EXPLORATION=0.0,
                 EXPLORATION_DEC_STEPS=10000,
                 num_thread=1,
                 LR_mode=0):
        self.env = gym.make(environment)
        self.model = model
        self.exploration = INITIAL_EXPLORATION
        self.training_steps = training_steps
        self.AGENT_HISTORY_LENGTH = AGENT_HISTORY_LENGTH
        self.POPULATION_SIZE = POPULATION_SIZE
        self.EPS_AVG = EPS_AVG
        self.SIGMA = SIGMA
        self.LEARNING_RATE = LEARNING_RATE
        self.INITIAL_EXPLORATION = INITIAL_EXPLORATION
        self.FINAL_EXPLORATION = FINAL_EXPLORATION
        self.EXPLORATION_DEC_STEPS = EXPLORATION_DEC_STEPS
        self.num_thread = num_thread
        self.LR_mode = LR_mode
        self.es = EvolutionStrategy(self.model.get_weights(),
                                    self.get_reward,
                                    self.POPULATION_SIZE,
                                    self.SIGMA,
                                    self.LEARNING_RATE,
                                    num_threads=num_thread,
                                    LR_mode=self.LR_mode)

    def get_predicted_action(self, sequence):
        prediction = self.model.predict(np.array(sequence))
        return prediction

    def load(self, model_file):
        with open(model_file, 'rb') as fp:
            self.model.set_weights(pickle.load(fp))
        self.es.weights = self.model.get_weights()

    def save(self, model_file):
        with open(model_file, 'wb') as fp:
            pickle.dump(self.es.get_weights(), fp)

    def train(self, iterations):
        print('Training')
        self.es.run(iterations, print_step=1)
        optimized_weights = self.es.get_weights()
        self.model.set_weights(optimized_weights)

    def play(self, episodes, render=True):
        self.model.set_weights(self.es.weights)
        for episode in range(episodes):
            print('On episode number {}'.format(episode))
            total_reward = 0
            observation = self.env.reset()
            sequence = [observation] * self.AGENT_HISTORY_LENGTH
            done = False
            while not done:
                if render:
                    self.env.render()
                action = self.get_predicted_action(sequence)
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                sequence = sequence[1:]
                sequence.append(observation)
            print("total reward:", total_reward)

    def get_reward(self, weights):
        total_reward = 0.0
        self.model.set_weights(weights)

        for episode in range(self.EPS_AVG):
            start_time = time.time()
            observation = self.env.reset()
            sequence = [observation] * self.AGENT_HISTORY_LENGTH
            done = False
            while not done:
                self.exploration = max(
                    self.FINAL_EXPLORATION, self.exploration -
                    self.INITIAL_EXPLORATION / self.EXPLORATION_DEC_STEPS)
                if random.random() < self.exploration:
                    action = self.env.action_space.sample()
                else:
                    action = self.get_predicted_action(sequence)
                observation, reward, done, _ = self.env.step(action)
                total_reward += reward
                sequence = sequence[1:]
                sequence.append(observation)
        #print("total reward: ", total_reward)
        #print('Finished in {} seconds'.format(time.time() - start_time))
        return total_reward / self.EPS_AVG