Example #1
0
def run_eval(dir_name: str, episodes: int = 100, render: bool = False) -> List[int]:
    agent_conf = AgentConf()
    env = Tetris()
    agent = DQNAgent(env.get_state_size(),
                     n_neurons=agent_conf.n_neurons, activations=agent_conf.activations,
                     epsilon_stop_episode=agent_conf.epsilon_stop_episode, mem_size=agent_conf.mem_size,
                     discount=agent_conf.discount, replay_start_size=agent_conf.replay_start_size)

    # timestamp_str = "20190730-165821"
    # log_dir = f'logs/tetris-nn={str(agent_conf.n_neurons)}-mem={agent_conf.mem_size}' \
    #     f'-bs={agent_conf.batch_size}-e={agent_conf.epochs}-{timestamp_str}'

    # tetris-20190731-221411-nn=[32, 32]-mem=25000-bs=512-e=1 good

    log_dir = 'logs/' + dir_name

    # load_model
    agent.model = load_model(f'{log_dir}/model.hdf')
    agent.epsilon = 0
    scores = []
    for episode in range(episodes):
        env.reset()
        done = False

        while not done:
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            # find the action, that corresponds to the best state
            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break
            _, done = env.hard_drop([best_action[0], 0], best_action[1], render=render)
        scores.append(env.score)
        # print results at the end of the episode
        print(f'episode {episode} => {env.score}')
    return scores
Example #2
0
    state_size = env.OBSERVATION_SPACE_VALUES
    action_size = env.ACTION_SPACE_SIZE
    agent = DQNAgent(state_size, action_size)
    done = False
    batch_size = 64

    best_score = -1
    render = False

    if PLAY:
        play_game(agent, False)
    else:
        if LOAD_MODEL:
            print("Loading Model...")
            agent.load(load_file_name)
            agent.epsilon = 1.0
            agent.learning_rate = 0.001
            agent.epsilon_decay = 0.990
            agent.gamma = 0.95

        for phase in range(7, 8):
            env.number_of_grids = phase + 3
            agent.epsilon = 1.0
            agent.memory = deque(maxlen=2000)
            phase_scores = deque(maxlen=5)
            for e in range(EPISODES):
                done = False
                state = env.reset()
                # env.seed(0)
                state = np.reshape(state, [1, state_size])
                score = 0
Example #3
0
def dqn():
    episodes = 10000
    max_steps = None
    epsilon_stop_episode = 7000
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 1000
    log_every = 20
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']

    env = Tetris()
    '''
    with open(r"saved_agents/pickled_new_agent_10000_7000", "rb") as input_file:
        agent = pickle.load(input_file)
        agent.epsilon = 0
    
    '''
    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size)
    agent.epsilon = 0
    '''
    hateris = DQNAgent(env.get_state_size()+1,
                     n_neurons=n_neurons, activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size,
                     discount=discount, replay_start_size=replay_start_size)
    #env.hater = hateris
    '''

    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []
    tot_max_score = 0
    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states(env.current_piece)
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)
            #agent.add_to_memory(current_state, next_states[best_action], reward, done)
            #hateris.add_to_memory(current_state+[env.current_piece], next_states[best_action]+[env.current_piece], -reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        #if episode % train_every == 0:
        #agent.train(batch_size=batch_size, epochs=epochs)
        #hateris.train(batch_size=batch_size, epochs=epochs)

        # Logs
        #if log_every and episode and episode % log_every == 0 and episode>101:
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])
            print(
                str(episode) + " " + str(avg_score) + " " + str(min_score) +
                " " + str(max_score))
            '''if (tot_max_score < max_score):
                agent.save("dqnAgentMax10000.h5", episode)
                tot_max_score = max_score'''

    #agent.save("dqnAgent10000.h5", episode)

# with open("saved_agents/pickled_new_agent_10000_7000", "wb") as input_file:
#pickle.dump(agent,input_file)
    plt.plot(scores)
    plt.show()
                  ' || average-score %.2f' % avg_score,
                  'max-score %.2f' % max_score,
                  ' || epsilon %.2f' % agent_dqn.epsilon, 'num_steps',
                  num_steps)

            if score > max_score:
                max_score = score  # Possible improvemen - short moving max_score average
                agent_dqn.save_network_checkpoints()

        print("Training done!")
        plot_metrics(steps_arr, score_history, eps_history, figure_file_name)

    # Capturing the agent playing
    if create_capture:
        img_arr = []
        agent_dqn.epsilon = 0.0001
        observation = env.reset()

        for i in range(1000):
            frame = np.uint8(observation[0] * 255)
            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
            img_arr.append(frame)

            action = agent_dqn.choose_action(observation)
            observation, _, _, _ = env.step(action)

        out = cv2.VideoWriter('captures/' + agent_dqn.env_name + '.avi',
                              cv2.VideoWriter_fourcc(*'DIVX'), 25, (84, 84))

        for i in range(len(img_arr)):
            out.write(img_arr[i])
Example #5
0
    model.add(Dense(8, activation="relu", name="layer2"))
    model.add(Dense(action_size, activation="linear", name="layer3"))
    model.compile(loss='mse', optimizer=Adam(lr=learning_rate))

    model.summary()

    # model.add(Dense(8, input_dim=state_size, activation='relu'))
    # model.add(Dense(16, activation='relu'))
    # model.add(Dense(32, activation='relu'))
    # model.add(Dense(action_size, activation='linear'))
    # model.compile(loss='mse',
    #               optimizer=Adam(lr=learning_rate))

    agent = DQNAgent(state_size, action_size, model)

    agent.epsilon = 0.75

    done = False
    batch_size = 64
    EPISODES = 100
    counter = 0
    total_reward = 0

    # env.turn_off_display()

    for e in range(EPISODES):
        # if e == 2:
        #     env.turn_on_display()

        summary = []
def main(model=None, mode='train', start_episode=0):
    my_xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
    <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      <About>
        <Summary>Hill Descent.</Summary>
      </About>
      <ModSettings>
        <MsPerTick>20</MsPerTick>
      </ModSettings>
      <ServerSection>

        <ServerInitialConditions>

            <Time><StartTime>1</StartTime></Time>
        </ServerInitialConditions>
        <ServerHandlers>

          <DefaultWorldGenerator seed="-999595225643433963" forceReset="false" destroyAfterUse="false" />

          <ServerQuitFromTimeUp timeLimitMs="100000000"/>
          <ServerQuitWhenAnyAgentFinishes/>
        </ServerHandlers>
      </ServerSection>
      <AgentSection mode="Survival">
        <Name>Bob</Name>
        <AgentStart>
          <Placement x="28.5" y="87" z="330.5" pitch="-90" yaw="0"/>
        </AgentStart>
        <AgentHandlers>
          <DiscreteMovementCommands/>
          <MissionQuitCommands quitDescription="done"/>
          <ChatCommands/>
          <ObservationFromFullStats/>
          <ObservationFromGrid>
              <Grid name="sight">
                  <min x="{}" y="{}" z="{}"/>
                  <max x="{}" y="{}" z="{}"/>
              </Grid>
              <Grid name="feet">
                  <min x="0" y="-1" z="0"/>
                  <max x="0" y="-1" z="0"/>
              </Grid>
      </ObservationsationFromGrid>
          <AgentQuitFromTouchingBlockType>
              <Block type="cobblestone" />
          </AgentQuitFromTouchingBlockType>
        </AgentHandlers>
      </AgentSection>
    </Mission>

    '''.format(-(grid_width - 1) // 2, -grid_height, -(grid_width - 1) // 2,
               (grid_width - 1) // 2, grid_height, (grid_width - 1) // 2)

    batch_size = 100
    agent = DQNAgent(state_size, action_size, learning_rate, discount_rate,
                     epsilon, epsilon_min, epsilon_decay)
    if model != None:
        agent.load(model)
        if mode == 'test':
            agent.epsilon = 0.0
        print('loaded model: {}'.format(model))
    else:
        clear_csv('./data/results.csv')
        clear_csv('./data/moves.csv')

    my_client_pool = MalmoPython.ClientPool()
    my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001))
    agent_host = MalmoPython.AgentHost()

    for e in range(start_episode + 1, episodes + 1):
        my_mission = MalmoPython.MissionSpec(my_xml, True)
        my_mission_record = MalmoPython.MissionRecordSpec()
        my_mission.requestVideo(800, 500)
        my_mission.setViewpoint(2)
        print("Waiting for the mission to start", end=' ')
        agent_host.startMission(
            my_mission,
            my_mission_record,
        )
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            print(".", end="")
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
            for error in world_state.errors:
                print("Error:", error.text)
        print()
        agent_host.sendCommand('chat /kill @e[type=Chicken]')
        agent_host.sendCommand('chat /kill @e[type=Pig]')
        agent_host.sendCommand('chat /kill @e[type=Cow]')
        moves = 0
        episode_reward = 0

        while world_state.is_mission_running:
            world_state = agent_host.getWorldState()
            if world_state.number_of_observations_since_last_state > 0:
                try:
                    obvsText = world_state.observations[-1].text
                    data = json.loads(obvsText)
                except:
                    print("Error when getting state")
                    continue

                state = get_state(data)

                prev_x = data.get(u'XPos', 0)
                prev_y = data.get(u'YPos', 0)
                prev_z = data.get(u'ZPos', 0)

                useful_state = [state[2], state[6], state[7], state[8], \
                    state[10], state[11], state[13], \
                    state[14], state[16], state[17], \
                    state[18], state[22]]

                action = agent.act(useful_state)

                if ((action == 0 and state[grid_center - grid_width] == 0)
                        or (action == 1 and state[grid_center + 1] == 0) or
                    (action == 2 and state[grid_center + grid_width] == 0)
                        or (action == 3 and state[grid_center - 1] == 0)):
                    agent_host.sendCommand(jump_directions[action])
                else:
                    agent_host.sendCommand(directions[action])
                time.sleep(0.25)
                #print("North:", state[grid_center - grid_width], \
                #      "  East:", state[grid_center + 1], \
                #      "  South:", state[grid_center + grid_width], \
                #      "  West:", state[grid_center - 1])

                try:
                    world_state = wait_world_state(agent_host, world_state)
                    obvsText = world_state.observations[-1].text
                    data = json.loads(obvsText)
                except:
                    print("Error when getting state")
                    continue

                current_x = data.get(u'XPos', 0)
                current_y = data.get(u'YPos', 0)
                current_z = data.get(u'ZPos', 0)
                damage_taken = calculate_damage(prev_y, current_y)
                next_state = get_state(data)

                useful_next_state = [state[2], state[6], state[7], state[8], \
                    state[10], state[11], state[13], \
                    state[14], state[16], state[17], \
                    state[18], state[22]]

                # print("previous and current y", prev_y, current_y)
                # print("damage taken", damage_taken)
                #print("X:", prev_x, current_x, "\n", \
                #      "Y:", prev_y, current_y, "\n", \
                #      "Z:", prev_z, current_z, "\n")
                reward = 2 * (
                    prev_y - current_y
                ) - 50 * damage_taken - 1 if prev_x != current_x or prev_y != current_y or prev_z != current_z else -1000
                episode_reward += reward
                done = True if current_y <= goal_height or not world_state.is_mission_running or data[
                    'Life'] <= 0 else False

                agent.remember(useful_state, action, reward, useful_next_state,
                               done)
                if ((action == 0 and state[grid_center - grid_width] == 0)
                        or (action == 1 and state[grid_center + 1] == 0) or
                    (action == 2 and state[grid_center + grid_width] == 0)
                        or (action == 3 and state[grid_center - 1] == 0)):
                    print(
                        'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}'
                        .format(e, episodes, jump_directions[action], reward,
                                agent.epsilon, moves, done))
                else:
                    print(
                        'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}'
                        .format(e, episodes, directions[action], reward,
                                agent.epsilon, moves, done))
                moves += 1

                if mode == 'train' or model == None:
                    write_to_csv('./data/moves.csv',
                                 [e, current_x, current_y, current_z, reward])

                if e > batch_size:
                    agent.replay(batch_size)

                if done or moves > max_moves:
                    agent_host.sendCommand("quit")

        if (mode == 'train'
                or model == None) and (e in checkpoints
                                       or agent.epsilon <= epsilon_min):
            print('saving model at episode {}'.format(e))
            agent.save('./models/model_{}'.format(e))
            if agent.epsilon <= epsilon_min:
                break

        time.sleep(1)
        # my_mission.forceWorldReset()
        if mode == 'train' or model == None:
            write_to_csv('./data/results.csv',
                         [e, episode_reward, moves,
                          int(episode_reward > 0)])