Esempio n. 1
0
 def save(self, path):
     """Save model to a pickle located at `path`"""
     with tempfile.TemporaryDirectory() as td:
         U.save_state(os.path.join(td, "model"))
         arc_name = os.path.join(td, "packed.zip")
         with zipfile.ZipFile(arc_name, 'w') as zipf:
             for root, dirs, files in os.walk(td):
                 for fname in files:
                     file_path = os.path.join(root, fname)
                     if file_path != arc_name:
                         zipf.write(file_path,
                                    os.path.relpath(file_path, td))
         with open(arc_name, "rb") as f:
             model_data = f.read()
     with open(path, "wb") as f:
         dill.dump((model_data, self._act_params), f)
Esempio n. 2
0
    def evaluate(step, episode_number):
        global max_eval_reward_mean, model_saved
        print('Evaluate...')
        eval_reward_sum = 0.0
        # Run evaluation episodes
        for eval_episode in range(n_eval_episodes):
            obs = env.reset()
            done = False
            while not done:
                # Choose action
                action_idxes = np.array(
                    act(np.array(obs)[None],
                        stochastic=False))  # deterministic
                actions_greedy = action_idxes / num_action_grains * actions_range + low

                if eval_std == 0.0:
                    action = actions_greedy
                else:
                    action = []
                    for index in range(len(actions_greedy)):
                        a_greedy = actions_greedy[index]
                        out_of_range_action = True
                        while out_of_range_action:
                            a_stoch = np.random.normal(loc=a_greedy,
                                                       scale=eval_std)
                            a_idx_stoch = np.rint(
                                (a_stoch + high[index]) /
                                actions_range[index] * num_action_grains)
                            if a_idx_stoch >= 0 and a_idx_stoch < num_actions_pad:
                                action.append(a_stoch)
                                out_of_range_action = False

                # Step
                obs, rew, done, _ = env.step(action)
                eval_reward_sum += rew

        # Average the rewards and log
        eval_reward_mean = eval_reward_sum / n_eval_episodes
        print(eval_reward_mean, 'over', n_eval_episodes, 'episodes')

        with open(
                "{}/{}_{}_{}_eval.csv".format(results_dir, method_name,
                                              time_stamp, env_name),
                "a") as eval_fw:
            eval_writer = csv.writer(
                eval_fw,
                delimiter="\t",
                lineterminator="\n",
            )
            eval_writer.writerow([episode_number, step, eval_reward_mean])
        summary = sess.run(eval_summary,
                           feed_dict={eval_placeholder: eval_reward_mean})
        log_writer.add_summary(summary, episode_number)
        log_writer.flush()

        if max_eval_reward_mean is None or eval_reward_mean > max_eval_reward_mean:
            logger.log(
                "Saving model due to mean eval increase: {} -> {}".format(
                    max_eval_reward_mean, eval_reward_mean))
            U.save_state(model_file)
            model_saved = True
            max_eval_reward_mean = eval_reward_mean