def run_benchmark(iterations, env_params): env = Herding(**env_params) dogs_count = env.env_data.config.dogs_count env.reset() start_time = time.time() for i in range(iterations): env.step(np.random.rand(dogs_count, 3).astype(np.float32)) end_time = time.time() env.close() result = end_time - start_time print(str(int(iterations / result)) + " iterations / s")
def run_benchmarks(params: BenchmarkParams = BenchmarkParams(), log_dir: str = None, show_plot=False): dogs_counts = list( range(params.dogs_min, params.dogs_max, params.dogs_step)) sheep_counts = list( range(params.sheep_min, params.sheep_max, params.sheep_step)) actions = _get_actions(dogs_counts) results = np.zeros((params.dogs_max, params.sheep_max)) for dogs_count in dogs_counts: for sheep_count in sheep_counts: env = Herding(dogs_count=dogs_count, sheep_count=sheep_count) env.reset() start_time = time.time() for i in range(params.benchmark_iterations): env.step(np.random.rand(dogs_count, 3)) end_time = time.time() result = end_time - start_time results[dogs_count, sheep_count] = result if log_dir is not None: _write_log(results, log_dir) if show_plot: _show_plot(dogs_counts, sheep_counts, results)
"policy_mapping_fn": lambda agent_id: "policy", }, "horizon": 2000, "num_gpus": 1, "explore": False #"replay_sequence_length": 5, #"num_workers": 4, #"num_envs_per_worker": 2, } ray.init(local_mode=True) checkpoint_number = 790 env = Herding({"sheep_count": 3 #"agents_layout": "simple" }) agent = ppo.PPOTrainer(config=config, env=HerdingEnvWrapper) agent.restore( rf"C:\Users\Mateusz\ray_results\Herding\Herding\checkpoint_{checkpoint_number}\checkpoint-{checkpoint_number}" ) while True: episode_reward = 0 done = False steps = 0 obs = env.reset() while (not done) and (steps != 300): action = agent.compute_action(obs[0], policy_id="policy") obs, reward, done, info = env.step(np.array([[2, action]])) env.render()
def play(my_env=None): env = my_env or Herding() manual_steering = ManualSteering(env) manual_steering.run_env()
episode_reward = 0 self.env.close() @staticmethod def print_debug(*args): print('\r', end='', flush=True) for arg in args: print(str(arg) + '\t', end='', flush=True) def play(my_env=None): env = my_env or Herding() manual_steering = ManualSteering(env) manual_steering.run_env() if __name__ == '__main__': args = {'sheep_count': 80, 'dogs_count': 1} if len(sys.argv) > 1: config = get_default_configuration() parser = argparse.ArgumentParser() for name, value in config.items(): parser.add_argument('--' + name, type=type(value)) args_all = vars(parser.parse_args()) for name, value in args_all.items(): if args_all[name] is not None: args[name] = value play(Herding(args))
def play(my_env=None): env = my_env if my_env is not None else Herding() manual_steering = ManualSteering(env) manual_steering.run_env()