Beispiel #1
0
def main(start, ends, goal, n_height, n_width, obstacles, weight_file):
    # env
    env = maze_grid(start, ends, n_height, n_width, obstacles)  # maze
    env.render()
    # input('123')
    # model
    input_dim = env.observation_space.n
    hidden_dim = 256
    output_dim = env.action_space.n
    model = dqn.dqn_model(input_dim, hidden_dim, output_dim)
    model.load_weights(weight_file)

    print('Goal', goal)
    # show the demo
    states = list()
    env.reset(states, show=goal)
    s0 = np.reshape(states, [1, input_dim])
    total_reward = 0
    is_done = False
    steps = 0
    while not is_done:
        a0 = int(np.argmax(model.predict(s0)[0]))
        s1, r1, is_done, info = env.step(a0)
        env.render()
        time.sleep(0.3)
        s1 = np.reshape(s1, [1, input_dim])
        s0 = s1
        total_reward += r1
        steps += 1
        print('The reward: %d, the agent path distance is %d step.' %
              (total_reward, steps))
    time.sleep(1)
def train_agent(start, ends, n_height, n_width, obstacles, model_file=None):
    # build the environment
    env_1 = maze_grid(start, ends, n_height, n_width, obstacles[0])  # maze
    env_2 = maze_grid(start, ends, n_height, n_width, obstacles[1])
    # input('123')
    # q learning model
    agent = QAgent(
        envs=[env_1, env_2],
        memory_capacity=100 * env_1.maze_size,  # experience memory
        hidden_dim=256,
        model_file=model_file)
    print("Learning...")
    agent.learning(max_episodes=500,
                   batch_size=512,
                   gamma=0.9,
                   min_epsilon=0.01)
def main(starts, end, n_height, n_width, obstacles, weight_file):
    # --- env
    start = starts[0]  # choose the start
    env = maze_grid(starts, end, n_height, n_width, obstacles)  # maze
    env.render()
    # input('123')
    # --- model
    model = dqn.dqn_model((3, 3, 1), (5, 5, 3))
    # load the weight
    model.load_weights(weight_file)
    # --- observation the new states
    # --- initial agent states
    agent_init_states = np.ones((5, 5, 1))
    agent_3_states = deque(maxlen=3)
    # 3 continue state
    for _ in range(3):
        agent_3_states.append(agent_init_states)
    goal_states, agent_states = env.reset(start,
                                          test=True)  # get the initial states
    # --- observation the new states
    goal_states = np.reshape(goal_states, [3, 3, 1])
    goal_states = goal_states[np.newaxis, :]
    agent_states = np.reshape(agent_states, [5, 5, 1])
    agent_3_states.append(agent_states)
    # reshape the dim [3, 5, 5, 1] --> [1, 5, 5, 3]
    agent_s0 = np.concatenate(
        (agent_3_states[0], agent_3_states[1], agent_3_states[2]), axis=2)
    agent_s0 = agent_s0[np.newaxis, :]

    # --- show the demo
    total_reward = 0
    is_done = False
    steps = 0
    while True:
        s0 = [goal_states, agent_s0]
        a0 = int(np.argmax(model.predict(s0)[0]))
        s1, r1, is_done, info = env.step(a0)
        env.render()
        steps += 1
        total_reward += r1
        if is_done:
            print('Arrive at the goal ...')
            break
        env.render()
        time.sleep(0.3)
        # update states
        s1 = np.reshape(s1, [5, 5, 1])
        agent_3_states.append(s1)
        # reshape the dim [3, 5, 5, 1] --> [1, 5, 5, 3]
        agent_s1 = np.concatenate(
            (agent_3_states[0], agent_3_states[1], agent_3_states[2]), axis=2)
        agent_s1 = agent_s1[np.newaxis, :]
        agent_s0 = agent_s1

    print('The reward: %d, the agent path distance is %d step.' %
          (total_reward, steps))
    time.sleep(1)
Beispiel #4
0
def train_agent(start, ends, n_height, n_width, obstacles, model_file=None):
    # build the environment
    env = maze_grid(start, ends, n_height, n_width, obstacles)  #maze
    env.render()
    # q learning model
    agent = QAgent(env,
                   memory_capacity=100*env.maze_size,    # experience memory
                   hidden_dim=256,
                   model_file=model_file)
    print("Learning...")
    agent.learning(max_episodes=3000,
                   batch_size=512,
                   gamma=0.9,
                   min_epsilon=0.01)
Beispiel #5
0
def train_agent(start, ends, obstacles, load_model=False):

    env = maze_grid(start, ends, obstacles)  #maze
    env.render()
    agent = QAgent(
        env,
        memory_capacity=100 * env.maze_size,  # experience memory
        hidden_dim=100,
        model_file=load_model)
    env.reset()
    print("Learning...")
    agent.learning(max_episodes=300,
                   batch_size=512,
                   gamma=0.9,
                   min_epsilon=0.01)
def main(start, ends, n_height, n_width, obstacles):
    # env
    env = maze_grid(start, ends, n_height, n_width, obstacles)  # maze
    env.render()
    # model
    input_dim = env.observation_space.n
    hidden_dim = 256
    output_dim = env.action_space.n
    model = dqn.dqn_model(input_dim, hidden_dim, output_dim)
    # load the weight
    weight_file = 'single_agent_2_goals.h5'
    model.load_weights(weight_file)
    reward_record = dict()
    for i, goal in enumerate(ends):
        each_goal_record = list()
        print('Goal', goal)
        # show the demo
        for _ in range(10):
            states = list()
            env.reset(states, show=goal)
            env.render()
            s0 = np.reshape(states, [1, input_dim])
            total_reward = 0
            is_done = False
            steps = 0
            while not is_done:
                a0 = int(np.argmax(model.predict(s0)[0]))
                s1, r1, is_done, info = env.step(a0)
                env.render()
                time.sleep(0.1)
                s1 = np.reshape(s1, [1, input_dim])
                s0 = s1
                total_reward += r1
                steps += 1
            print('The reward: %d, the agent path distance is %d step.' %
                  (total_reward, steps))
            each_goal_record.append(total_reward)
            time.sleep(0.5)
        print('10 times mean reward:%f reward: %s' %
              (np.mean(each_goal_record), each_goal_record))
        reward_record[i + 1] = each_goal_record

    print(reward_record)
Beispiel #7
0
def train_agent(starts, end, n_height, n_width, obstacles, model_file=None):
    """
    build the environment
    @param starts: list
    @param end: tuple
    @param n_height: int
    @param n_width: int
    @param obstacles: list
    @param model_file: str model weights

    """
    env = maze_grid(starts, end, n_height, n_width, obstacles)  # maze
    env.render()
    # input('show')
    # q learning model
    agents = QAgent(
        env,
        memory_capacity=50 * env.maze_size,  # experience memory
        model_file=model_file)
    logging.info("Learning...")
    agents.learning(max_episodes=1000,
                    batch_size=512,
                    gamma=0.95,
                    min_epsilon=0.01)
Beispiel #8
0
# choose the different instance
# start = (2, 9)
# ends = [(5, 4)]  # many ends
# instance 1
# obstacles = [(4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (1, 7), (2, 7), (3, 7), (4, 7), (6, 7), (7, 7), (8, 7)]  # 1

# instance 2
# obstacles = [(4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (1, 7), (2, 7), (3, 7), (4, 7), (5, 7), (6, 7), (7, 7), (8, 7)] #2

# instance difficult
start = (5, 9)
ends = [(4, 5)]
obstacles = [(2, 7), (3, 7), (4, 7), (5, 7), (6, 7), (7, 7), (3, 5), (6, 5),
             (2, 3), (3, 3), (6, 3), (7, 3)]

env = maze_grid(start, ends, obstacles)
model = load_model('./model3/dqn_maze_300.h5')

score_list = list()
episode = 10

# run 10 times
for i in range(episode):
    state = env.reset()
    score = 0
    while True:
        env.render()
        time.sleep(0.05)
        action = np.argmax(model.predict(np.array([state]))[0])
        state, reward, done, _ = env.step(action)
        score += reward