def main(): q_value = list() env = environment() env.start_simulation() env.set_goal(-2, -2) env.update() RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH) RL_solver.set_temperature(1.0) weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights') if not os.path.exists(weight_path): print('[ERROR] Weights path %s dose not exist, save weights to current path' % weight_path) weight_path = os.getcwd() if READ_WEIGHTS: RL_solver.load_q(weight_path) #for action in range(ACTION_SPACE_LENGTH): for action in range(1): for i_episoide in range(NUM_EPISOIDE): print('Start episoide %d for action %d' %(i_episoide, action)) env.robot_go_initial() env.update() RL_solver.rl_init(env.states) pre_qvalue = RL_solver.q[env.states, action] print("pre_qvalue:", pre_qvalue) finish_flag = False for t in range(100): env.action = action env.take_action() env.update() reward = env.get_reward() RL_solver.action_prev = action RL_solver.action_aft_policy = action RL_solver.q_learning(env.states, reward) curr_qvalue = RL_solver.q[env.states, action] q_value.append(curr_qvalue) print("curr_qvalue:", curr_qvalue) if abs(curr_qvalue - pre_qvalue) < 0.03: finish_flag = True break pre_qvalue = curr_qvalue isSuccess = (env.distance < 0.5) if env.is_incline: print('incline too much, episoide stop') break if isSuccess: print('goal reached, episoide stop') break env.stop_simulation() print('i_episoide %d execution finished' %(i_episoide)) #RL_solver.save_q(weight_path) time.sleep(1) env.start_simulation() if finish_flag: print("curr_qvalue:", curr_qvalue) break env.stop_simulation() env.stop_client() #RL_solver.save_q(weight_path) plt.xlabel("t") plt.ylabel("Q value") plt.plot(q_value) plt.show() print ('[INFO] Program ended')
def main(): env = environment() env.start_simulation() env.update() RL_solver = rl(ACTION_SPACE_LENGTH, len(env.get_features())) if READ_WEIGHTS: weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights') RL_solver.load_weights(weight_path) start_sequence = [4, 4, 70, 70, 10, 10, 22, 22] process_sequence = [ 58, 58, 16, 16, 16, 16, 64, 64, 64, 64, 76, 76, 4, 4, 70, 70, 70, 70, 10, 10, 10, 10, 22, 22 ] for i_episoide in range(NUM_EPISOIDE): print('Start episoide %d' % (i_episoide)) # at beginning of each episoide, let robot go to home pose # get current joint position and robot postion and robot orientation env.robot_go_initial() RL_solver.rl_init(env.features) sequence = chain(start_sequence, cycle(process_sequence)) starttime = time.time() env.update() for t in range(10000): RL_solver.action_prev = next(sequence) env.action = RL_solver.action_prev env.take_action() #print(time.time() - starttime) time.sleep(0.05 - ((time.time() - starttime) % 0.05)) starttime = time.time() env.update() isSuccess = (abs(env.curr_robot_position[0]) > 1 or abs(env.curr_robot_position[1]) > 1) isFail = (t == 9999) reward = env.get_reward2() #print(reward) RL_solver.q_learning(env.features, reward) if abs(env.curr_robot_orientation[0]) > 3 or abs( env.curr_robot_orientation[1]) > 3: print('incline too much, episoide stop') break if isSuccess: print('goal reached, episoide stop') break if isFail: print('fail to reach goal, episoide end') env.stop_simulation() print('episoide %d execution finished' % (i_episoide)) if i_episoide % 500 == 499: RL_solver.save_weights(weight_path) time.sleep(1) env.start_simulation() #print ('body position', get_body_position_streaming(clientID, Body_Handle)) env.stop_simulation() env.stop_client() weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights') if not os.path.exists(weight_path): print( '[ERROR] Weights path %s dose not exist, save weights to current path' % weight_path) weight_path = os.getcwd() RL_solver.save_weights(weight_path) print('[INFO] Program ended')
import numpy as np import Kinematics.Kinematics_firmware as kin from Vrep.vrep_simulation import obstacle, environment import time env = environment() env.start_simulation() env.update() env.robot_go_initial() env.update() dx = 50.0 dy = 50.0 alt = 15.0 radius = 153.0 x_ = np.cos(np.pi / 4) * radius y_ = np.cos(np.pi / 4) * radius kin_solver = kin.Kinematics() initial_joint_position = np.array([ 0.0, -29.76, -123.87, 0.0, -29.76, -123.87, 0.0, -29.76, -123.87, 0.0, -29.76, -123.87 ]) * np.pi / 180 joint_position = list(initial_joint_position) for step in np.arange(1.0, 1.2, 0.2): for rise in np.arange(1.0, 6.0, 0.8): print("step=", step) print("rise=", rise) print("") print("")
def main(): env = environment() env.start_simulation() env.set_goal(-1, -1) env.update() RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH) RL_solver.set_temperature(1.0) weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights') if not os.path.exists(weight_path): print('[ERROR] Weights path %s dose not exist, save weights to current path' % weight_path) weight_path = os.getcwd() if READ_WEIGHTS: RL_solver.load_q(weight_path) reward_hist = [] for i_episoide in range(NUM_EPISOIDE): print('Start episoide %d' %(i_episoide)) env.robot_go_initial() env.update() RL_solver.rl_init(env.states) for t in range(1000): env.action = RL_solver.action_prev #env.action = 0 env.take_action() env.update() reward = env.get_reward() reward_hist.append(reward) ''' if RL_solver.state_prev >= 880: RL_solver.state_aft = env.states RL_solver.make_policy_aft() RL_solver.state_prev = RL_solver.state_aft RL_solver.action_prev = RL_solver.action_aft_policy else: ''' RL_solver.q_learning(env.states, reward) isSuccess = (env.distance < 0.05) if env.is_incline: print('incline too much, episoide stop') break if isSuccess: print('goal reached, episoide stop') break env.stop_simulation() print('episoide %d execution finished' %(i_episoide)) RL_solver.save_q(weight_path) time.sleep(1) env.start_simulation() env.stop_simulation() env.stop_client() RL_solver.save_q(weight_path) plt.plot(reward_hist)
def main(): env = environment() env.start_simulation() env.set_goal(-2, -2) env.update() experience_pool = [] exp_pool_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'experience') exp_num = 0 RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH) RL_solver.set_temperature(1.0) weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights') if not os.path.exists(weight_path): print( '[ERROR] Weights path %s dose not exist, save weights to current path' % weight_path) weight_path = os.getcwd() if READ_WEIGHTS: RL_solver.load_q(weight_path) for i_episoide in range(NUM_EPISOIDE): print('Start episoide %d' % (i_episoide)) env.robot_go_initial() env.update() RL_solver.rl_init(env.states) for t in range(1000): env.action = RL_solver.action_prev curr_experience = [env.states, env.action] env.take_action() env.update() reward = env.get_reward() RL_solver.state_aft = env.states RL_solver.make_policy_aft() RL_solver.q_learning(env.states, reward) curr_experience += [ reward, env.states, RL_solver.action_aft_policy ] experience_pool.append(curr_experience) exp_num += 1 ''' if RL_solver.state_prev == 880: RL_solver.state_aft = env.states RL_solver.make_policy_aft() RL_solver.state_prev = RL_solver.state_aft RL_solver.action_prev = RL_solver.action_aft_policy else: ''' isSuccess = (env.distance < 0.05) if env.is_incline: print('incline too much, episoide stop') break if isSuccess: print('goal reached, episoide stop') break print("exp_len: ", len(experience_pool)) print("current sarsa: ", curr_experience) if exp_num >= EXP_MAX: break if exp_num >= EXP_MAX: file_path = os.path.join(weight_path, "experience.pkl") with open(file_path, 'wb') as writefile: cPickle.dump(experience_pool, writefile) print('[INFO] Sucessfully save experience to %s' % (file_path)) break env.stop_simulation() print('episoide %d execution finished' % (i_episoide)) RL_solver.save_q(weight_path) time.sleep(1) env.start_simulation() env.stop_simulation() env.stop_client() RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH) if TRAIN_ON_EXP: for _ in tqdm.trange(10000): for idx, sarsa in enumerate(experience_pool): RL_solver.state_prev = sarsa[0] RL_solver.action_prev = sarsa[1] reward = sarsa[2] states_ = sarsa[3] RL_solver.action_aft_policy = sarsa[4] RL_solver.q_learning(states_, reward) RL_solver.save_q(weight_path) print('[INFO] Program ended')