Python DDPGAgent.forward Exemples, rl.agents.DDPGAgent.forward Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : model_1_testme.py Projet : sushrutt12/CognitiveComputing_FinalProject

    agent.save_weights(args.model, overwrite=True)

# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
    agent.load_weights(args.model)
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()

# If TEST and no TOKEN, run some test experiments
if not args.train and not args.token:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
""" """

Exemple #2

0

Afficher le fichier

Fichier : train.ddpg.py Projet : wiplug/osim-rl

                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)

if not args.train:
    agent.load_weights(args.output)
    # Finally, evaluate our algorithm for 5 episodes.

    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 
            
            env.step(agent.forward(env.get_observation()))

Exemple #3

0

Afficher le fichier

Fichier : DDPG_keras.py Projet : WHY-David/repo

                  target_model_update=1e-3)
agent.compile(optimizer=Adam(
    lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False))

if __name__ == '__main__':
    # Load
    agent.load_weights('OsmoEnv.h5f')

    # Train
    # agent.fit(env, nb_steps=50000, verbose=1, nb_max_episode_steps=200)
    #
    # # Weights
    # agent.save_weights('OsmoEnv.h5f',overwrite=True)
    #
    # # Load
    # agent.load_weights('OsmoEnv.h5f')

    # Test
    # agent.test(env, visualize=False, nb_episodes=50, nb_max_episode_steps=200)

    #Play
    for _ in range(10):
        observation = env.reset()
        done = False
        while not done:
            action = agent.forward(observation)
            observation, reward, done, info = env.step(action)
            print(action)
        else:
            print(info)

Exemple #4

0

Afficher le fichier

Fichier : pub_drive.py Projet : tsuchiya-i/rgbd_ddpg_navigation

    print("not found weights-file")

rospy.init_node('pub_drive', anonymous=True)
rospy.Subscriber("/observe", Float32MultiArray, callback_observe)

pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)
r = rospy.Rate(10) # 5hz

fig, ax = plt.subplots(1, 1)
ax.set_ylim(math.radians(-50), math.radians(50))
x = []
y = []
step_count = 0

while not rospy.is_shutdown():
    predict_action = agent.forward(observe_data)
    pub_vel = Twist()
    if predict_action[0] > 0.8:
        pub_vel.linear.x = 0.8
    elif predict_action[0] < -0.8:
        pub_vel.linear.x = -0.8
    else:
        pub_vel.linear.x = predict_action[0]
    if predict_action[1] > 0.8:
        pub_vel.angular.z = 0.8
    elif predict_action[1] < -0.8:
        pub_vel.angular.z = -0.8
    else:
        pub_vel.angular.z = predict_action[1]

    pub_vel.linear.x = pub_vel.linear.x*0.5

Exemple #5

0

Afficher le fichier

Fichier : train.ddpg.py Projet : kumarjitpathakbangalore/backup

                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000)
    # After training is done, we save the final weights.
    agent.save_weights(args.output, overwrite=True)

if not args.train:
    agent.load_weights(args.output)
    # Finally, evaluate our algorithm for 5 episodes.

    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 
            
            env.step(agent.forward(env.get_observation()))

Exemple #6

0

Afficher le fichier

Fichier : example.py Projet : wiplug/osim-rl

    agent.save_weights(args.model, overwrite=True)

# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
    agent.load_weights(args.model)
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()

# If TEST and no TOKEN, run some test experiments
if not args.train and not args.token:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)

Exemple #7

0

Afficher le fichier

Fichier : example.py Projet : zhanghonglishanzai/learning2run

                              nb_max_episode_steps=1000)

    print('Saving model ' + args.model)
    agent.save_weights(args.model, overwrite=True)
    print('Saved model ' + args.model)

    with open(args.model + '_history', 'w') as f:
        f.write(str(keras_history.history))
    summarize.plot_diagrams(keras_history.history, args.model)

if args.test:
    agent.load_weights(args.model)
    agent.test(env, nb_episodes=1, nb_max_episode_steps=env.timestep_limit)

if args.submit:
    agent.load_weights(args.model)
    remote_base = 'http://grader.crowdai.org:1729'
    token = '688545d8ba985c174b4f967b40924a43'
    client = Client(remote_base)
    observation = client.env_create(token)
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        [observation, reward, done,
         info] = client.env_step(agent.forward(observation).tolist())
        print(observation)
        if done:
            observation = client.env_reset()
            if not observation:
                break
    client.submit()

Exemple #8

0

Afficher le fichier

Fichier : train.ddpg.py Projet : kumarjitpathakbangalore/backup

# Ctrl + C.
if args.train:
    agent.fit(env,
              nb_steps=nallsteps,
              visualize=True,
              verbose=1,
              nb_max_episode_steps=env.timestep_limit,
              log_interval=10000,
              prefix=prefix)
    # After training is done, we save the final weights.
    agent.save_weights("%s.h5f" % args.output, overwrite=True)

if not args.train:
    agent.load_weights("%s.h5f" % args.output)
    # Finally, evaluate our algorithm for 5 episodes.
    if args.env != "Arm":
        agent.test(env,
                   nb_episodes=5,
                   visualize=True,
                   nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("\n\nTarget shoulder = %f, elbow = %f" %
                      (env.shoulder, env.elbow))

            obs = env.get_observation()
            print "Actual shoulder = %f, elbow = %f\r" % (obs[2], obs[3]),
            env.step(agent.forward(obs))

Exemple #9

0

Afficher le fichier

Fichier : DDPG.py Projet : TangChangcheng/SquareEnvironment-0

env.is_train = False
env.plot_row = 1
env.plot_col = 5

q_values = pd.DataFrame()
st = status.reshape([-1, 1])
for action in actions:
    state1_batch_with_action = [
        st, np.ones(st.shape).reshape(-1, 1, 1) * action
    ]
    q_values = pd.concat([
        q_values,
        pd.DataFrame(
            agent.target_critic.predict_on_batch(state1_batch_with_action))
    ],
                         axis=1)
q_values.to_csv('critic.csv')

with open('actor.json', 'w') as fw:
    observation = status.tolist()
    action = [float(agent.forward(np.array([obs]))[0]) for obs in observation]
    json.dump({'observation': observation, 'action': action}, fw)

agent.test(env,
           nb_episodes=5,
           visualize=True,
           verbose=1,
           nb_max_episode_steps=20)

env.plt.ioff()
env.plt.show()

Exemple #10

0

Afficher le fichier

Fichier : train.ddpg.py Projet : wiplug/osim-rl

#                            gamma=.99, target_model_update=0.1)
#agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

prefix = args.output if args.output else "%s_s%f_t%f" % (args.env ,float(args.sigma), float(args.theta))

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
    agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000, prefix=prefix)
    # After training is done, we save the final weights.
    agent.save_weights("%s.h5f" % args.output, overwrite=True)

if not args.train:
    agent.load_weights("%s.h5f" % args.output)
    # Finally, evaluate our algorithm for 5 episodes.
    if args.env != "Arm":
        agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)
    else:
        for i in range(10000):
            if i % 300 == 0:
                env.new_target()
                print("\n\nTarget shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) 

            obs = env.get_observation()
            print "Actual shoulder = %f, elbow = %f\r" % (obs[2],obs[3]),
            env.step(agent.forward(obs))