def __init__(self, state_representation, fps=60, ups=None): self.env = Environment( height=10, width=10, depth=3, agents=1, agent_class=AIAgent, draw_screen=True, tile_height=32, tile_width=32, # scheduler=RandomScheduler, ups=ups, ticks_per_second=1, spawn_interval=1, # In seconds task_generate_interval=1, # In seconds task_assign_interval=1, # In seconds delivery_points=[(7, 2), (2, 2), (2, 7), (7, 7)]) self.state_representation = state_representation(self.env) # Assumes that all agnets have spawned already and that all tasks are assigned. self.env.deploy_agents() self.env.task_assignment() self.last_time = time.time() self.pickup_count = 0 self.delivery_count = 0 self.stat_deliveries = [] self.episode = 0 # env.daemon = True # env.start() self.player = self.env.agents[0]
class DeepLogisticBase(MultiAgentEnv): def __init__(self, height, width, ai_count, agent_count, agent, ups, delivery_points, state, render_screen=False): self.render_screen = render_screen os.environ["MKL_NUM_THREADS"] = "1" self.env = Environment( height=height, width=width, depth=3, agents=ai_count, agent_class=agent, draw_screen=self.render_screen, tile_height=32, tile_width=32, #scheduler=RandomScheduler, ups=ups, ticks_per_second=1, spawn_interval=1, # In steps task_generate_interval=1, # In steps task_assign_interval=1, # In steps delivery_points=delivery_points) self.statistics = Statistics() assert ai_count < agent_count self.state_representation = state(self.env) self.observation_space = self.state_representation.generate( self.env.agents[0]) self.action_space = Discrete(self.env.action_space.N_ACTIONS) self.grouping = {'group_1': ["agent_%s" % x for x in range(ai_count)]} self.agents = { k: self.env.agents[i] for i, k in enumerate(self.grouping["group_1"]) } obs_space = Tuple([self.observation_space for _ in range(ai_count)]) act_space = Tuple([self.action_space for _ in range(ai_count)]) """self.with_agent_groups( groups=self.grouping, obs_space=obs_space, act_space=act_space )""" """Spawn all agents etc..""" self.env.deploy_agents() self.env.task_assignment() self.episode = 0 def get_agents(self): return self.env.agents
def __init__(self, state, reward, width, height, depth, taxi_n, group_type="individual", graphics_render=False, delivery_locations=None): os.environ["MKL_NUM_THREADS"] = "1" self.env = Environment( width=width, height=height, depth=depth, taxi_n=taxi_n, taxi_agent=Agent, ups=None, graphics_render=graphics_render, delivery_locations=delivery_locations, spawn_strategy=spawn_strategy.RandomSpawnStrategy) self.state_representation = state(self.env) self.reward_function = reward self._render = graphics_render self.observation_space = Box(low=-1, high=1, shape=self.state_representation.generate( self.env.agents[0]).shape, dtype=np.float32) self.action_space = Discrete(self.env.action_space.N_ACTIONS) self.agents = { "agent_%s" % i: self.env.agents[i] for i in range(taxi_n) } self.total_steps = 0 """Set up grouping for the environments.""" if group_type == "individual": self.grouping = { "group_%s" % x: ["agent_%s" % x] for x in range(taxi_n) } elif group_type == "grouped": self.grouping = { 'group_1': ["agent_%s" % x for x in range(taxi_n)] } else: raise NotImplementedError("The group type %s is not implemented." % group_type) self.with_agent_groups( groups=self.grouping, obs_space=Tuple([self.observation_space for _ in range(taxi_n)]), act_space=Tuple([self.action_space for _ in range(taxi_n)]))
class DeepLogisticsNormal(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.env = Environment(height=5, width=5, depth=3, ups=None, ticks_per_second=1, taxi_n=1, taxi_agent=None, taxi_respawn=False, taxi_control="constant", scheduler=OnDemandScheduler, delivery_locations=None, spawn_strategy=LocationSpawnStrategy, graphics_render=True, graphics_tile_height=16, graphics_tile_width=16) self.frame_skip = 4 self.agent = self.env.get_agent(0) self.sgen = StateFull(self.env) self._seed = 0 self.action_space = gym.spaces.Discrete(self.env.action_space.n) self.observation_space = self.sgen.generate(self.agent).shape def step(self, action): self.agent.do_action(action) #for _ in range(self.frame_skip): # TODO will f**k up reward self.env.update() self.env.render() state1 = self.sgen.generate(self.agent) reward, terminal = Reward0(self.agent) if terminal: info = dict(deliveries=self.agent.total_deliveries, pickups=self.agent.total_pickups) else: info = None return state1, reward, terminal, info def reset(self): self.env.reset() return self.sgen.generate(self.agent) def render(self, mode='human', close=False): return self.sgen.generate(self.agent)
def __init__(self): self.env = Environment(height=5, width=5, depth=3, ups=None, ticks_per_second=1, taxi_n=1, taxi_agent=None, taxi_respawn=False, taxi_control="constant", scheduler=OnDemandScheduler, delivery_locations=None, spawn_strategy=LocationSpawnStrategy, graphics_render=True, graphics_tile_height=16, graphics_tile_width=16) self.frame_skip = 4 self.agent = self.env.get_agent(0) self.sgen = StateFull(self.env) self._seed = 0 self.action_space = gym.spaces.Discrete(self.env.action_space.n) self.observation_space = self.sgen.generate(self.agent).shape
class BaseDeepLogisticsMultiEnv(MultiAgentEnv): def __init__(self, state, reward, width, height, depth, taxi_n, group_type="individual", graphics_render=False, delivery_locations=None): os.environ["MKL_NUM_THREADS"] = "1" self.env = Environment( width=width, height=height, depth=depth, taxi_n=taxi_n, taxi_agent=Agent, ups=None, graphics_render=graphics_render, delivery_locations=delivery_locations, spawn_strategy=spawn_strategy.RandomSpawnStrategy) self.state_representation = state(self.env) self.reward_function = reward self._render = graphics_render self.observation_space = Box(low=-1, high=1, shape=self.state_representation.generate( self.env.agents[0]).shape, dtype=np.float32) self.action_space = Discrete(self.env.action_space.N_ACTIONS) self.agents = { "agent_%s" % i: self.env.agents[i] for i in range(taxi_n) } self.total_steps = 0 """Set up grouping for the environments.""" if group_type == "individual": self.grouping = { "group_%s" % x: ["agent_%s" % x] for x in range(taxi_n) } elif group_type == "grouped": self.grouping = { 'group_1': ["agent_%s" % x for x in range(taxi_n)] } else: raise NotImplementedError("The group type %s is not implemented." % group_type) self.with_agent_groups( groups=self.grouping, obs_space=Tuple([self.observation_space for _ in range(taxi_n)]), act_space=Tuple([self.action_space for _ in range(taxi_n)])) def step(self, action_dict): self.total_steps += 1 # TODO this loop does not make sense when using multiple policies. # Now we do 1 action for all taxis with a single policy (i think) instead of 1 action per policy # Cluster: https://ray.readthedocs.io/en/latest/install-on-docker.html#launch-ray-in-docker< info_dict = {} reward_dict = {} terminal_dict = {"__all__": False} state_dict = {} """Perform actions in environment.""" for agent_name, action in action_dict.items(): self.agents[agent_name].do_action(action=action) """Update the environment""" self.env.update() if self._render: self.env.render() """Evaluate score""" t__all__ = False for agent_name, agent in action_dict.items(): reward, terminal = self.reward_function(self.agents[agent_name]) reward_dict[agent_name] = reward terminal_dict[agent_name] = terminal if terminal: t__all__ = terminal state_dict[agent_name] = self.state_representation.generate( self.agents[agent_name]) """Update terminal dict""" terminal_dict["__all__"] = t__all__ return state_dict, reward_dict, terminal_dict, info_dict def reset(self): self.env.reset() self.total_steps = 0 return { agent_name: self.state_representation.generate(agent) for agent_name, agent in self.agents.items() } @staticmethod def on_episode_end(info): episode = info["episode"] env = info["env"].envs[0] deliveries = 0 pickups = 0 for agent in env.env.agents: deliveries += agent.total_deliveries pickups += agent.total_pickups deliveries = deliveries / len(env.env.agents) pickups = pickups / len(env.env.agents) episode.custom_metrics["deliveries"] = deliveries episode.custom_metrics["pickups"] = pickups
class Env: def __init__(self, state_representation, fps=60, ups=None): self.env = Environment( height=10, width=10, depth=3, agents=1, agent_class=AIAgent, draw_screen=True, tile_height=32, tile_width=32, # scheduler=RandomScheduler, ups=ups, ticks_per_second=1, spawn_interval=1, # In seconds task_generate_interval=1, # In seconds task_assign_interval=1, # In seconds delivery_points=[(7, 2), (2, 2), (2, 7), (7, 7)]) self.state_representation = state_representation(self.env) # Assumes that all agnets have spawned already and that all tasks are assigned. self.env.deploy_agents() self.env.task_assignment() self.last_time = time.time() self.pickup_count = 0 self.delivery_count = 0 self.stat_deliveries = [] self.episode = 0 # env.daemon = True # env.start() self.player = self.env.agents[0] def step(self, action): state = self.player.state self.player.do_action(action=action) self.env.update() new_state = self.player.state # print("%s => %s" % (state, new_state)) """Fast-forward the game until the player is respawned.""" while self.player.state == Agent.INACTIVE: self.env.update() state = self.state_representation.generate(self.env.agents[0]) if self.player.state in [Agent.IDLE, Agent.MOVING]: reward = -0.01 terminal = False elif self.player.state in [Agent.PICKUP]: self.pickup_count += 1 reward = 1 terminal = False # print("Pickup", state, self.player.task.c_1) elif self.player.state in [Agent.DELIVERY]: self.delivery_count += 1 reward = 10 terminal = False # print("Delivery", state) elif self.player.state in [Agent.DESTROYED]: reward = -1 terminal = True else: raise NotImplementedError( "Should never happen. all states should be handled somehow") return state, reward, terminal, {} def reset(self): print( "[%s] Environment was reset, took: %s seconds. Pickups: %s, Deliveries: %s" % (self.episode, time.time() - self.last_time, self.pickup_count, self.delivery_count)) self.last_time = time.time() self.stat_deliveries.append(self.delivery_count) if self.episode % 50 == 0: self.graph() self.pickup_count = 0 self.delivery_count = 0 self.episode += 1 self.env.reset() def render(self): self.env.render() return self.state_representation.generate(self.env.agents[0]) def graph(self): plt.plot([x for x in range(len(self.stat_deliveries))], self.stat_deliveries, color='blue') plt.xlabel('Episode') plt.ylabel('Number of Successive Deliveries') plt.title('Deep Logistics - PPO - Experiment A') plt.savefig("./ppo-experiment.png")
sys.path.append("/home/per/GIT/code/deep_logistics") sys.path.append("/root") from deep_logistics.environment import Environment from deep_logistics.agent import InputAgent from state_representations import State0 if __name__ == "__main__": env = Environment( height=10, width=10, depth=3, agents=2, agent_class=InputAgent, draw_screen=True, tile_height=32, tile_width=32, #scheduler=RandomScheduler, ups=60, ticks_per_second=1, spawn_interval=1, # In steps task_generate_interval=1, # In steps task_assign_interval=1, # In steps delivery_points=[(7, 2), (2, 2), (2, 7), (7, 7)], ) env.deploy_agents() env.task_assignment() state = State0(env) agent = env.agents[0] def on_event():
sys.path.append("/home/per/IdeaProjects/deep_logistics") sys.path.append("/home/per/GIT/code/deep_logistics") sys.path.append("/root") from deep_logistics.environment import Environment from deep_logistics.agent import InputAgent if __name__ == "__main__": env = Environment( height=5, width=5, depth=3, ups=None, ticks_per_second=1, taxi_n=1, taxi_agent=InputAgent, taxi_respawn=False, taxi_control="constant", scheduler=OnDemandScheduler, delivery_locations=None, spawn_strategy=LocationSpawnStrategy, graphics_render=True, graphics_tile_height=64, graphics_tile_width=64 ) env.deploy_agents() env.task_assignment() state = State0(env) agent = env.agents[0] def on_event():