Example #1
0
def run_benchmark(iterations, env_params):
    env = Herding(**env_params)
    dogs_count = env.env_data.config.dogs_count
    env.reset()
    start_time = time.time()
    for i in range(iterations):
        env.step(np.random.rand(dogs_count, 3).astype(np.float32))
    end_time = time.time()
    env.close()
    result = end_time - start_time
    print(str(int(iterations / result)) + " iterations / s")
Example #2
0
def run_benchmarks(params: BenchmarkParams = BenchmarkParams(),
                   log_dir: str = None,
                   show_plot=False):
    dogs_counts = list(
        range(params.dogs_min, params.dogs_max, params.dogs_step))
    sheep_counts = list(
        range(params.sheep_min, params.sheep_max, params.sheep_step))
    actions = _get_actions(dogs_counts)
    results = np.zeros((params.dogs_max, params.sheep_max))

    for dogs_count in dogs_counts:
        for sheep_count in sheep_counts:
            env = Herding(dogs_count=dogs_count, sheep_count=sheep_count)
            env.reset()
            start_time = time.time()
            for i in range(params.benchmark_iterations):
                env.step(np.random.rand(dogs_count, 3))
            end_time = time.time()
            result = end_time - start_time
            results[dogs_count, sheep_count] = result

    if log_dir is not None:
        _write_log(results, log_dir)
    if show_plot:
        _show_plot(dogs_counts, sheep_counts, results)
Example #3
0
        "policy_mapping_fn": lambda agent_id: "policy",
    },
    "horizon": 2000,
    "num_gpus": 1,
    "explore": False
    #"replay_sequence_length": 5,
    #"num_workers": 4,
    #"num_envs_per_worker": 2,
}

ray.init(local_mode=True)

checkpoint_number = 790

env = Herding({"sheep_count": 3
               #"agents_layout": "simple"
               })
agent = ppo.PPOTrainer(config=config, env=HerdingEnvWrapper)
agent.restore(
    rf"C:\Users\Mateusz\ray_results\Herding\Herding\checkpoint_{checkpoint_number}\checkpoint-{checkpoint_number}"
)

while True:
    episode_reward = 0
    done = False
    steps = 0
    obs = env.reset()
    while (not done) and (steps != 300):
        action = agent.compute_action(obs[0], policy_id="policy")
        obs, reward, done, info = env.step(np.array([[2, action]]))
        env.render()
Example #4
0
def play(my_env=None):
    env = my_env or Herding()
    manual_steering = ManualSteering(env)
    manual_steering.run_env()
Example #5
0
                episode_reward = 0

        self.env.close()

    @staticmethod
    def print_debug(*args):
        print('\r', end='', flush=True)
        for arg in args:
            print(str(arg) + '\t', end='', flush=True)


def play(my_env=None):
    env = my_env or Herding()
    manual_steering = ManualSteering(env)
    manual_steering.run_env()


if __name__ == '__main__':
    args = {'sheep_count': 80, 'dogs_count': 1}
    if len(sys.argv) > 1:
        config = get_default_configuration()
        parser = argparse.ArgumentParser()
        for name, value in config.items():
            parser.add_argument('--' + name, type=type(value))
        args_all = vars(parser.parse_args())
        for name, value in args_all.items():
            if args_all[name] is not None:
                args[name] = value

    play(Herding(args))
Example #6
0
def play(my_env=None):
    env = my_env if my_env is not None else Herding()

    manual_steering = ManualSteering(env)
    manual_steering.run_env()