Python Experience.load Examples

Programming Language: Python

Namespace/Package Name: experience

Class/Type: Experience

Method/Function: load

Examples at hotexamples.com: 2

Python Experience.load - 2 examples found. These are the top rated real world Python examples of experience.Experience.load extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Experience(30)

add_frame(4)

is_full(4)

sample_sequence(2)

sample_rp_sequence(2)

load(2)

remember(1)

title(1)

serialize(1)

save(1)

sample(1)

responsibilities(1)

reset(1)

predict_e(1)

predict_e_se(1)

add(1)

get_random_minibatch(1)

get_data(1)

getLastestState(1)

employer(1)

duration_years(1)

dump(1)

carry_parameters_totarget(1)

training_items(1)

Example #1

Show file

File: balance_data.py Project: MBlogs/hanabi-learning-environment

def main():
    from experience import Experience
    from visualization import hist_classes, scatter_classes

    class_count_list = []
    agents = [
        'RainbowAgent', 'SimpleAgent', 'SecondAgent', 'ProbabilisticAgent'
    ]

    for agent in agents:

        exp = Experience(agent, load=True)
        labels, _, examples, _ = exp.load()
        class_count, _ = divide_and_count(examples, labels)
        class_count_list.append(class_count)

    scatter_classes(class_count_list, agents)

Example #2

Show file

File: game.py Project: msultan/deep-q-tensorflow

class Game(object):
  def __init__(self, env=None, agent=None, logdir=None, should_render=None, should_load=None):
    self.env = env
    self.agent = agent
    self.config = self.agent.config
    self.logdir = logdir
    self.should_render = should_render
    self.experience = Experience(self.config)

    if should_load:
      self.load()
    else:
      self.step = 0
      self.epsilon = 0.3
      self.train_rewards = [0] * 100
      self.current_episode = 0

  def save(self):
    with open(os.path.join(self.logdir, "variables.save"), "wb") as w:
      save_list = [self.step, self.epsilon, self.train_rewards, self.current_episode]
      cPickle.dump(save_list, w, protocol=cPickle.HIGHEST_PROTOCOL)
    self.experience.save(self.logdir)

  def load(self):
    with open(os.path.join(self.logdir, "variables.save"), "rb") as f:
      self.step, self.epsilon, self.train_rewards, self.current_episode = cPickle.load(f)
    self.experience.load(self.logdir)

  def runEpisode(self, sess, is_training=True):
    # Reset environment.
    state = self.env.reset()

    # Initialize stats.
    num_steps = 0
    total_reward = 0.
    total_loss = 0.

    old_step = self.step
    self.epsilon = max(self.epsilon * 0.99, 0.1) if is_training else self.config.TEST_EPSILON

    for t in xrange(self.config.NUM_STEPS):
      # Render environment.
      if self.should_render:
        self.env.render()
      # Agent plans according to the current action values.
      queried_state = self.experience.getLastestState()
      action = self.agent.plan(sess, queried_state,
          self.env.action_space.sample(), self.epsilon)
      # Environment executes the action.
      next_state, reward, done, info = self.env.step(action)
      total_reward += reward

      # Add observation to experience.
      self.experience.add(state, action, next_state, reward, 1 if done else 0)

      # If training, agent updates parameters according to the observation.
      if is_training and self.step > self.config.START_LEARN:
        # Update parameters of the current training network.
        if self.step % self.config.TRAIN_FREQUENCY == 0:
          (batch_states, batch_actions, batch_next_states, batch_rewards,
              batch_is_terminal) = self.experience.sample()
          total_loss += self.agent.observe(
              sess, batch_states, batch_actions, batch_next_states,
              batch_rewards, batch_is_terminal)
        # Update parameters of the target network
        if self.step % self.config.UPDATE_TARGET_FREQUENCY == 0:
          self.agent.update_target(sess)
      # End of episode.
      if done:
        break
      # Update state
      state = next_state
      # Increment time.
      self.step += 1

    # Message at the end of the episode.
    num_steps = self.step - old_step + 1

    return num_steps, total_reward, total_loss

  def train(self, num_episodes):
    global_step = slim.get_or_create_global_step()
    sv = tf.train.Supervisor(logdir=self.logdir, save_model_secs=20, global_step=global_step)
    with sv.managed_session("") as sess:
      for i in xrange(num_episodes):
        ep_num_steps, ep_reward, ep_loss = self.runEpisode(sess, is_training=True)
        self.train_rewards[self.current_episode % 100] = ep_reward
        if self.current_episode % 20 == 0:
          self.save()
        self.current_episode += 1
        print "Episode", self.current_episode, "has finshed in", ep_num_steps, "steps"
        print  "    Reward: {:10}     Loss: {:.6f}     Epsilon: {:.3f}".format(ep_reward, ep_loss, self.epsilon)
        print "Running average reward for the last 100 episodes:", \
              sum(self.train_rewards) / min(100, self.current_episode)
      sv.saver.save(sess, self.logdir, global_step=sv.global_step)
      self.save()

  def play(self, num_episodes):
    sv = tf.train.Supervisor(logdir=self.logdir)
    with sv.managed_session("") as sess:
      total_reward = 0.
      self.experience.reset()
      for i in xrange(num_episodes):
        ep_num_steps, ep_reward, num_steps = self.runEpisode(sess, is_training=False)
        total_reward += ep_reward
        print "Episode", i, "has finshed in", num_steps, "steps"
        print  "    Reward: {:10}".format(ep_reward)
        print "Running average reward:", total_reward / (i + 1)