tau=0.001,
             actor_lr=0.0001,
             critic_lr=0.001,
             use_layer_norm=True)
print('DDPG agent configured')
agent.load_model(agent.current_path + '/model/model.ckpt')
agent.load_memory()

max_episode = 10000
tot_rewards = []
print('env reset')
observation, done = env.reset()
action = agent.act(observation)
print(action)
rospy.sleep(0.8)
observation, reward, done = env.step(action)
rospy.sleep(0.8)
noise_sigma = 0.15
save_cutoff = 1
cutoff_count = 0
save_count = 0
curr_highest_eps_reward = -1000.0
for i in xrange(max_episode):
    if i % 100 == 0 and noise_sigma > 0.03:
        agent.noise = OUNoise(agent.nb_actions, sigma=noise_sigma)
        noise_sigma /= 2.0
    step_num = 0
    while done == False:
        step_num += 1
        action = agent.step(observation, reward, done)
        observation, reward, done = env.step(action)
# jointboi.move(testpos)

# rospy.sleep(1)

# TODO fix reset function

# rospy.sleep(1)
# print(envboi.get_latest_state())
# envboi.reset()
# rospy.sleep(2)
# #rospy.spin()

#position_command.pos_commander(testpos)
rewards = np.array([])
while True:
    current_distance = envboi.get_goal_distance()
    print("Current distance is: " + str(current_distance))
    action = (raw_input("Enter an action from -1 to 1: \n"))
    action = action.split(',')
    action = [float(i) for i in action]
    action = np.array(action)
    print(action)
    (state, reward, done) = envboi.step(action)
    "Reward is {}, Done: {}".format(reward, done)
    rewards = np.append(rewards, [reward])
    if (done):
        total_rewards = str(np.sum(rewards))
        print("Well done! you reached the goal \n Total reward: " +
              total_rewards)
        break
Exemple #3
0
env = ArmEnvironment(static_goal=True)
env.reset()
rospy.sleep(1)
print("Goal Distance: ", env.get_goal_distance())

while (True):
    action = input("Enter an action:").split(' ')
    floatarr = []
    for i in action:
        if (i == '0'):
            floatarr.append(0)
        else:
            floatarr.append(float(i))
    action = np.array(floatarr)
    env.step(floatarr)
    print("Goal Distance: ", env.get_goal_distance())
    reset = eval(input("Reset goal?"))
    if (reset):
        env.set_new_goal()

# alljoints.move(pos)
# #rospy.sleep(3)
# env.pause_physics()
# env.unpause_physics()

# env.set_model_config()

# env.pause_physics()

# alljoints.move(np.zeros(4))