def main(): agent = Agent() agent.load() total_reward = 0 obs = env.reset() env.render() for _ in range(10000): act = agent.predict(obs) obs, reward, done, _ = env.step(act) total_reward += reward env.render() if done: print(f'total_reward: {total_reward}') env.close() break
] # a, b = [deque([]), deque([])] agent_obs = [None] * flags.num_agents # [None, None] agent_obs_buffer = [None] * flags.num_agents agent_action_buffer = [2] * flags.num_agents max_steps = flags.episode_length start_time = time.time() # Load an RL agent and initialize it from checkpoint if necessary # independent dqn/ppo -->每个人obs不同,同一个model if flags.agent_type == "dqn": agent = DQN_Agent(state_size, action_size, flags.num_agents) elif flags.agent_type == "ppo": agent = PPO_Agent(state_size, action_size, flags.num_agents) if flags.load_model: start, eps = agent.load(project_root / 'checkpoints', 0, 1.0) else: start, eps = 0, 1 if not flags.train: eps = 0.0 # Helper function to detect collisions ACTIONS = {0: "up", 1: "right", 2: "down", 3: "left", 4: "stop"} def obs_wrapper(obss): ''' utility: [list] -> [array] ''' return np.array(obss)
crop_start = (15, 30) crop_end = (200, 125) starting_epsilon = 0.05 if LOAD_MODEL else 1.0 env = gym.make('SpaceInvaders-v0') brain = Agent(gamma=0.95, epsilon=0.05, lr=0.003, input_dims=input_dims, batch_size=batch_size, n_actions=n_actions, max_mem_size=5000, save_path='models/') if LOAD_MODEL: brain.load() else: # load memory with random games while brain.mem_cntr < brain.mem_size: observation = env.reset() observation = preprocess(observation, crop_start, crop_end) done = False while not done: # 0 no action, 1 fire, 2 move right, 3 move left, 4 move right fire, 5 move left fire action = env.action_space.sample() observation_, reward, done, info = env.step(action) observation_ = preprocess(observation_, crop_start, crop_end) if done and info['ale.lives'] == 0: reward = -100 brain.store_transition(observation, action, reward, observation_, int(done))
env.render() while True: action = self.agent.act(states) states, _, done, _ = env.step(action) env.render() if done: break time.sleep(0.01) seed = 3721 env = SingleLngEnv( n_loc=10, n_steps=1000, fuel_cost=0.1, price_sigma=0.1, price_daily_vol=0.02, price_theta=0.01, max_distance=30.0, normalize=True ) # from ddpg import Agent from dqn import Agent agent = Agent(state_size=env.n_loc * 3 + 2, action_size=env.n_loc, random_seed=seed) agent.load() agent.train(env, 1000, 1000) solver = DdpgLngSolver(env, agent, 3721) solver.solve()