Esempi in Python per DQN.act

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: agent

Classe/tipologia: DQN

Metodo/funzione: act

Esempi su hotexamples.com: 4

DQN.act in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per agent.DQN.act, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

DQN(30)

sample_action(4)

train(4)

remember(4)

act(4)

load_model(3)

choose_action(3)

construct_model(3)

save_model(3)

learn(3)

replay(2)

save(2)

get_action(2)

create_model(2)

set_model(1)

state_dict(1)

train_model(1)

show_cost(1)

load_state_dict(1)

parameters(1)

init_var(1)

evaluate(1)

eval(1)

copy_weights(1)

train_with_experience_replay(1)

Esempio n. 1

Mostra file

def train(game):
    agent = DQN(game)

    for i in tqdm(range(TRAIN_GAMES)):
        game.new_episode()
        previous_variables = None
        previous_img = None
        done = False
        local_history = []
        total_reward = 0
        while not done:
            state = game.get_state()

            img = state.screen_buffer
            variables = state.game_variables
            if previous_variables is None:
                previous_variables = variables
            if previous_img is None:
                previous_img = img

            action = agent.act(img)
            reward = game.make_action(action)
            done = game.is_episode_finished()
            reward = (reward + calculate_additional_reward(previous_variables, variables)) / 100
            total_reward += reward
            local_history.append([previous_img, img, reward, action, done])
            previous_variables = variables
            previous_img = img

        if total_reward >= 0:
            for previous_state, state, reward, action, done in local_history:
                agent.remember(previous_state, state, reward, action, done)
            agent.train()

Esempio n. 2

Mostra file

def run(ep,train=False):
    pygame.init()
    loss=[]
    agent = DQN(3, 5)
    env=pongGame()
    weights_filepath = 'PongGame.h5'
    if train==False:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")
    for e in range(ep):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        state = env.reset()
        state = np.reshape(state, (1, 5))
        score = 0
        max_steps = 1000
        for i in range(max_steps):
            action = agent.act(state)
            reward, next_state, done = env.step(action)
            score += reward
            next_state = np.reshape(next_state, (1, 5))
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if train==True:
                agent.replay()
            if done:
                print("episode: {}/{}, score: {}".format(e, ep, score))
                break
        loss.append(score)
    if train:
        agent.model.save_weights("PongGame.h5")
    return loss

Esempio n. 3

Mostra file

def play(game):
    agent = DQN(game, use_saved=True)
    for i in tqdm(range(PLAY_GAMES)):
        game.new_episode()
        done = False
        while not done:
            state = game.get_state()
            img = state.screen_buffer
            action = agent.act(img)
            print(action)
            game.make_action(action)
            done = game.is_episode_finished()

Esempio n. 4

Mostra file

File: flappy_bird.py Progetto: GuilhermeGSousa/rl-flappy-bird

    N_SAVE = 500
    env = gym.make('FlappyBird-v0')
    agent = DQN(env)
    scores = deque(maxlen=100)
    for i in range(N_EP):
        score = 0
        ob = env.reset()

        # Stack observations
        pre_ob = preprocess(ob)
        pre_ob = pre_ob.reshape(1, 100, 100)
        ob_stack = np.stack((pre_ob, ) * 4, -1)
        pre_ob = ob_stack

        while True:
            action = agent.act(pre_ob, step=i)

            ob, reward, done, _ = env.step(action)
            if reward <= -1:
                reward = -1

            next_pre_ob = preprocess(ob)

            # Stack observations
            next_pre_ob = next_pre_ob.reshape(1, 100, 100)
            ob_stack = np.insert(ob_stack, -1, next_pre_ob, axis=3)
            ob_stack = np.delete(ob_stack, 0, axis=3)
            next_pre_ob = ob_stack

            agent.remember(pre_ob, action, reward, next_pre_ob, done)
            agent.replay()