agent = DDPG(state_shape, action_shape, batch_size=128, gamma=0.995, tau=0.001, actor_lr=0.0001, critic_lr=0.001, use_layer_norm=True) print('DDPG agent configured') agent.load_model(agent.current_path + '/model/model.ckpt') agent.load_memory() max_episode = 10000 tot_rewards = [] print('env reset') observation, done = env.reset() action = agent.act(observation) print(action) rospy.sleep(0.8) observation, reward, done = env.step(action) rospy.sleep(0.8) noise_sigma = 0.15 save_cutoff = 1 cutoff_count = 0 save_count = 0 curr_highest_eps_reward = -1000.0 for i in xrange(max_episode): if i % 100 == 0 and noise_sigma > 0.03: agent.noise = OUNoise(agent.nb_actions, sigma=noise_sigma) noise_sigma /= 2.0 step_num = 0
import rospy from arm_env import ArmEnvironment, AllJoints import position_command import actionlib from trajectory_msgs.msg import JointTrajectory, JointTrajectoryPoint from control_msgs.msg import JointTrajectoryControllerState, FollowJointTrajectoryAction, FollowJointTrajectoryActionGoal, FollowJointTrajectoryGoal from std_srvs.srv import Empty from sensor_msgs.msg import JointState from gazebo_msgs.srv import SetModelState, SetModelStateRequest, SetModelConfiguration, SetModelConfigurationRequest from gazebo_msgs.srv import GetModelState, GetModelStateRequest from gazebo_msgs.msg import ModelState import numpy as np envboi = ArmEnvironment() envboi.reset() zero = np.array([0, 0, 0, 0]) testpos = np.array([0, 0.9, 0.5, 1.5]) #rospy.init_node('testing_node') # jointboi = AllJoints(['plat_joint','shoulder_joint','forearm_joint','wrist_joint']) # jointboi.move(testpos) # rospy.sleep(1) # TODO fix reset function # rospy.sleep(1) # print(envboi.get_latest_state()) # envboi.reset()
import rospy from arm_env import ArmEnvironment, AllJoints import numpy as np rospy.init_node("test") env = ArmEnvironment(static_goal=True) env.reset() rospy.sleep(1) print("Goal Distance: ", env.get_goal_distance()) while (True): action = input("Enter an action:").split(' ') floatarr = [] for i in action: if (i == '0'): floatarr.append(0) else: floatarr.append(float(i)) action = np.array(floatarr) env.step(floatarr) print("Goal Distance: ", env.get_goal_distance()) reset = eval(input("Reset goal?")) if (reset): env.set_new_goal() # alljoints.move(pos) # #rospy.sleep(3) # env.pause_physics() # env.unpause_physics()