'snake_dis', 'food_dis', 'snake_perc', 'food_perc', 'snake_50',
        'snake_100'
    ]

    for i in range(len(features_index)):
        new_features[features_index[i]] = features[i]

    return new_features


if __name__ == '__main__':

    # Create customized and processed slither env
    #universe.configure_logging(False)

    env = create_slither_env('features')
    env = Unvectorize(env)

    env.configure(fps=5.0,
                  remotes=1,
                  start_timeout=15 * 60,
                  vnc_driver='go',
                  vnc_kwargs={
                      'encoding': 'tight',
                      'compress_level': 0,
                      'fine_quality_level': 50
                  })

    observation_n = env.reset()

    ## init the q learning agent
Example #2
0
  FLAGS.grad_clip    = True

  FLAGS.check_every  = FLAGS.train_steps/10
  FLAGS.log_every    = 500
  FLAGS.learn_every  = 1

  FLAGS.gamma        = 0.99
  FLAGS.lr_end       = 0.00005
  FLAGS.lr_nsteps    = FLAGS.train_steps/2
  FLAGS.eps_end      = 0.1
  FLAGS.eps_nsteps   = FLAGS.train_steps/5

  FLAGS.fps          = 5
  FLAGS.state_hist   = 4

  env = create_slither_env(FLAGS.state_type)

  FLAGS.state_size   = env.state_size
  FLAGS.high_val     = env.high_val
  FLAGS.num_actions  = env.action_space.n

  env = Unvectorize(env)
  env.configure(fps=FLAGS.fps, remotes=FLAGS.remotes, start_timeout=15 * 60, vnc_driver='go', vnc_kwargs={'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50})

  # Make recording env
  record_env = None
  if FLAGS.record:
    record_env = create_slither_env(FLAGS.state_type)
    record_env = Unvectorize(record_env)
    record_env.configure(fps=30, remotes=1, start_timeout=15 * 60, vnc_driver='go', vnc_kwargs={'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50})
    record_env = gym.wrappers.Monitor(record_env, FLAGS.record_path, video_callable=lambda x: True, resume=True)
import utils.utils as utils

from agent import ApproximateQAgent
from utils.env import create_slither_env
from universe.wrappers import Unvectorize

# center of the frame
center_x = 270
center_y = 235

if __name__ == '__main__':

    # Create customized and processed slither env
    #universe.configure_logging(False)

    env = create_slither_env('shapes')
    env = Unvectorize(env)
    env.configure(fps=20.0,
                  remotes=1,
                  start_timeout=15 * 60,
                  vnc_driver='go',
                  vnc_kwargs={
                      'encoding': 'tight',
                      'compress_level': 0,
                      'fine_quality_level': 50
                  })

    observation_n = env.reset()

    ## init the q learning agent
    # read in stored weight from previous games with pickle
Example #4
0
    def train(self, exp_schedule, lr_schedule):
        # Initialize replay buffer and variables
        replay_buffer = ReplayBuffer(self.FLAGS.buffer_size,
                                     self.FLAGS.state_hist)
        rewards = deque(maxlen=self.FLAGS.num_test)
        max_q_values = deque(maxlen=1000)
        q_values = deque(maxlen=1000)
        self.init_averages()

        t = 0  # time control of nb of steps
        loss_eval = grad_eval = 0
        scores_eval = []  # list of scores computed at iteration time
        scores_eval += [self.evaluate(self.env, self.FLAGS.num_test)]

        self.prog = Progbar(target=self.FLAGS.train_steps)

        # Train for # of train steps
        while t < self.FLAGS.train_steps:
            continual_crash = 0
            try:
                total_reward = 0
                ep_len = 0
                state = self.env.reset()

                # Run for 1 episode and update the buffer
                while True:
                    ep_len += 1

                    # replay memory stuff
                    idx = replay_buffer.store_frame(state)
                    q_input = replay_buffer.encode_recent_observation()

                    # chose action according to current Q and exploration
                    best_action, q_values = self.network.get_best_action(
                        q_input)
                    action = exp_schedule.get_action(best_action)

                    # store q values
                    max_q_values.append(max(q_values))
                    q_values += list(q_values)

                    # perform action in env
                    new_state, reward, done, info = self.env.step(action)

                    # store the transition
                    replay_buffer.store_effect(idx, action, reward, done)
                    state = new_state

                    # Count reward
                    total_reward += reward

                    # Stop at end of episode
                    if done: break

                #Store episodic rewards
                if ep_len > 1: rewards.append(total_reward)

                # Learn using replay
                while True:
                    t += 1
                    ep_len -= 1

                    # Make train step if necessary
                    if ((t > self.FLAGS.learn_start)
                            and (t % self.FLAGS.learn_every == 0)):
                        loss_eval, grad_eval = self.network.update_step(
                            t, replay_buffer, lr_schedule.epsilon,
                            self.summary)
                        exp_schedule.update(t)
                        lr_schedule.update(t)

                    if (t % self.FLAGS.target_every == 0):
                        self.network.update_target_params()

                    # Update logs if necessary
                    if ((t > self.FLAGS.learn_start)
                            and (t % self.FLAGS.log_every == 0)
                            and (len(rewards) > 0)):
                        self.update_averages(rewards, max_q_values, q_values,
                                             scores_eval)
                        self.update_logs(t, loss_eval, rewards,
                                         exp_schedule.epsilon, grad_eval,
                                         lr_schedule.epsilon)

                    # Update logs if necessary
                    elif (t < self.FLAGS.learn_start) and (
                            t % self.FLAGS.log_every == 0):
                        sys.stdout.write(
                            "\rPopulating the memory {}/{}...".format(
                                t, self.FLAGS.learn_start))
                        sys.stdout.flush()

                    if ((t > self.FLAGS.learn_start)
                            and (t % self.FLAGS.check_every == 0)):
                        # Evaluate current model
                        scores_eval += [
                            self.evaluate(self.env, self.FLAGS.num_test)
                        ]

                        # Save current Model
                        self.network.save()

                        # Record video of current model
                        if self.FLAGS.record:
                            self.record()

                    if ep_len <= 0 or t >= self.FLAGS.train_steps: break
                continual_crash = 0

            except Exception as e:
                continual_crash += 1
                self.logger.info(e)
                if continual_crash >= 10:
                    self.logger.info("Crashed 10 times -- stopping u suck")
                    raise e
                else:
                    t -= 1
                    self.logger.info("Env crash, making new env")
                    time.sleep(60)
                    self.env = create_slither_env(self.FLAGS.state_type)
                    self.env = Unvectorize(self.env)
                    self.env.configure(fps=self.FLAGS.fps,
                                       remotes=self.FLAGS.remotes,
                                       start_timeout=15 * 60,
                                       vnc_driver='go',
                                       vnc_kwargs={
                                           'encoding': 'tight',
                                           'compress_level': 0,
                                           'fine_quality_level': 50
                                       })
                    time.sleep(60)

        # End of training
        self.logger.info("- Training done.")
        self.network.save()
        scores_eval += [self.evaluate(self.env, self.FLAGS.num_test)]
        export_plot(scores_eval, "Scores", self.FLAGS.plot_path)