tau=0.001, actor_lr=0.0001, critic_lr=0.001, use_layer_norm=True) print('DDPG agent configured') agent.load_model(agent.current_path + '/model/model.ckpt') agent.load_memory() max_episode = 10000 tot_rewards = [] print('env reset') observation, done = env.reset() action = agent.act(observation) print(action) rospy.sleep(0.8) observation, reward, done = env.step(action) rospy.sleep(0.8) noise_sigma = 0.15 save_cutoff = 1 cutoff_count = 0 save_count = 0 curr_highest_eps_reward = -1000.0 for i in xrange(max_episode): if i % 100 == 0 and noise_sigma > 0.03: agent.noise = OUNoise(agent.nb_actions, sigma=noise_sigma) noise_sigma /= 2.0 step_num = 0 while done == False: step_num += 1 action = agent.step(observation, reward, done) observation, reward, done = env.step(action)
# jointboi.move(testpos) # rospy.sleep(1) # TODO fix reset function # rospy.sleep(1) # print(envboi.get_latest_state()) # envboi.reset() # rospy.sleep(2) # #rospy.spin() #position_command.pos_commander(testpos) rewards = np.array([]) while True: current_distance = envboi.get_goal_distance() print("Current distance is: " + str(current_distance)) action = (raw_input("Enter an action from -1 to 1: \n")) action = action.split(',') action = [float(i) for i in action] action = np.array(action) print(action) (state, reward, done) = envboi.step(action) "Reward is {}, Done: {}".format(reward, done) rewards = np.append(rewards, [reward]) if (done): total_rewards = str(np.sum(rewards)) print("Well done! you reached the goal \n Total reward: " + total_rewards) break
env = ArmEnvironment(static_goal=True) env.reset() rospy.sleep(1) print("Goal Distance: ", env.get_goal_distance()) while (True): action = input("Enter an action:").split(' ') floatarr = [] for i in action: if (i == '0'): floatarr.append(0) else: floatarr.append(float(i)) action = np.array(floatarr) env.step(floatarr) print("Goal Distance: ", env.get_goal_distance()) reset = eval(input("Reset goal?")) if (reset): env.set_new_goal() # alljoints.move(pos) # #rospy.sleep(3) # env.pause_physics() # env.unpause_physics() # env.set_model_config() # env.pause_physics() # alljoints.move(np.zeros(4))