Ejemplos de DDPG.act en Python

Lenguaje de programación: Python

Namespace/Package Name: agent

Clase / Tipo: DDPG

Método / Función: act

Ejemplos en hotexamples.com: 2

Python DDPG.act - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de agent.DDPG.act extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

DDPG(26)

construct_model(4)

sample_action(4)

get_action(4)

store_experience(4)

learn(3)

update_model(3)

update(3)

add_experience_arp(3)

save_model(3)

train_arp(3)

select_action(3)

decay(3)

store_transition(3)

step(2)

test(2)

train(2)

train_ac(2)

train_lm(2)

save(2)

load_models(2)

reset_episode(2)

refine_action(2)

act(2)

load_model(2)

evaluate_actor(2)

construct_state(2)

build_nets(2)

add_experience_lm(2)

add_experience_ac(2)

load_memory(1)

hardupdate(1)

store_transitions(1)

close_all(1)

choose_action(1)

action_choose(1)

update_target_net(1)

Ejemplo n.º 1

Mostrar archivo

def train():

    runtime = 5.  # time limit of the episode
    init_pose = np.array([0., 0., 4.0, 0., 0., 0.0])  # initial pose
    init_velocities = np.array([0., 0., 0.0])  # initial velocities
    init_angle_velocities = np.array([0., 0., 0.])  # initial angle velocities
    file_output = 'rewards.txt'  # file name for saved results

    num_episodes = 10
    target_pos = np.array([0., 0., 40.])
    task = Task(init_pose=init_pose,
                init_velocities=init_velocities,
                init_angle_velocities=init_angle_velocities,
                target_pos=target_pos)
    agent = DDPG(task)

    labels = ['episod', 'avg_reward', 'total_reward']
    results = {x: [] for x in labels}

    with open(file_output, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(labels)
        best_total_reward = -1000
        for i_episode in range(1, num_episodes + 1):
            state = agent.reset_episode()  # start a new episode
            total_reward = 0
            rewards = []

            while True:

                # select action according to the learned policy and the exploration noise
                action = agent.act(state)
                # execute the action and observe the reward and the next state
                next_state, reward, done = task.step(action)

                # sample mini batch and learn
                agent.step(action, reward, next_state, done)

                # data tracking
                total_reward += reward
                rewards.append(reward)

                if total_reward > best_total_reward:
                    best_total_reward = total_reward

                state = next_state

                if done:
                    avg_reward = np.mean(np.array(rewards))
                    print(task.sim.pose)
                    #to_write = [task.sim.time] + list(task.sim.pose) + list(task.sim.v) + list(task.sim.angular_v) + list(rotor_speeds)
                    #for ii in range(len(labels)):
                    #    results[labels[ii]].append(to_write[ii])
                    #writer.writerow(to_write)

                    to_write = [i_episode] + [avg_reward] + [total_reward]
                    for ii in range(len(labels)):
                        results[labels[ii]].append(to_write[ii])
                    print(
                        "\rEpisode = {:4d}, total_reward = {:7.3f}, avg_reward={:7.3} (best = {:7.3f})"
                        .format(i_episode, total_reward, avg_reward,
                                best_total_reward),
                        end="")  # [debug]
                    break
            sys.stdout.flush()

    return agent

Ejemplo n.º 2

Mostrar archivo

Archivo: run.py Proyecto: messerzen/Reinforcement-Learning---Teach-a-quadcopter-how-to-flight

num_episodes = 1000
init_pose = np.array([0., 0., 0., 0., 0., 0.])
target_pos = np.array([0., 0., 10.])
init_velocities = np.array([0., 0., 0.])  # initial velocities
init_angle_velocities = np.array([0., 0., 0.])

task = Task(init_pose=init_pose,
            target_pos=target_pos,
            init_angle_velocities=init_angle_velocities,
            init_velocities=init_velocities)
best_score = -np.inf

agent = DDPG(task)

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new
    score = 0
    while True:
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(action, reward, next_state, done)
        state = next_state
        score += reward
        best_score = max(best_score, score)
        if done:
            print("\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f})".format(
                i_episode, score, best_score),
                  end="")  # [debug]
            break
    sys.stdout.flush()