def main():
    q_value = list()
            
    env = environment()
    env.start_simulation()
    env.set_goal(-2, -2)
    env.update()
    
    RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH)
    RL_solver.set_temperature(1.0)
    weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights')
    if not os.path.exists(weight_path):
        print('[ERROR] Weights path %s dose not exist, save weights to current path' % weight_path)
        weight_path = os.getcwd()
    
    if READ_WEIGHTS:
        RL_solver.load_q(weight_path)
        
    #for action in range(ACTION_SPACE_LENGTH):
    for action in range(1):
        for i_episoide in range(NUM_EPISOIDE):
            print('Start episoide %d for action %d' %(i_episoide, action))
            env.robot_go_initial()
            env.update()
            RL_solver.rl_init(env.states)
            pre_qvalue = RL_solver.q[env.states, action]
            print("pre_qvalue:", pre_qvalue)
            finish_flag = False
        
            for t in range(100):
                env.action = action
                env.take_action()
                env.update()
                reward = env.get_reward()
                RL_solver.action_prev = action
                RL_solver.action_aft_policy = action
                RL_solver.q_learning(env.states, reward)
                curr_qvalue = RL_solver.q[env.states, action]
                q_value.append(curr_qvalue)
                print("curr_qvalue:", curr_qvalue)
                if abs(curr_qvalue - pre_qvalue) < 0.03:
                    finish_flag = True
                    break
                pre_qvalue = curr_qvalue
            
                isSuccess = (env.distance < 0.5)
                if env.is_incline:
                    print('incline too much, episoide stop')
                    break
                if isSuccess:
                    print('goal reached, episoide stop')
                    break
                
            env.stop_simulation()
            print('i_episoide %d execution finished' %(i_episoide))
            #RL_solver.save_q(weight_path)
            time.sleep(1)
            env.start_simulation()
            if finish_flag:
                print("curr_qvalue:", curr_qvalue)
                break
        
        
    env.stop_simulation()
    env.stop_client()
    #RL_solver.save_q(weight_path)
    
    plt.xlabel("t")
    plt.ylabel("Q value")
    plt.plot(q_value)
    plt.show()
    print ('[INFO] Program ended')
Exemple #2
0
def main():

    env = environment()
    env.start_simulation()
    env.update()

    RL_solver = rl(ACTION_SPACE_LENGTH, len(env.get_features()))

    if READ_WEIGHTS:
        weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir,
                                   'Weights')
        RL_solver.load_weights(weight_path)

    start_sequence = [4, 4, 70, 70, 10, 10, 22, 22]
    process_sequence = [
        58, 58, 16, 16, 16, 16, 64, 64, 64, 64, 76, 76, 4, 4, 70, 70, 70, 70,
        10, 10, 10, 10, 22, 22
    ]

    for i_episoide in range(NUM_EPISOIDE):
        print('Start episoide %d' % (i_episoide))

        # at beginning of each episoide, let robot go to home pose  # get current joint position and robot postion and robot orientation
        env.robot_go_initial()
        RL_solver.rl_init(env.features)
        sequence = chain(start_sequence, cycle(process_sequence))

        starttime = time.time()
        env.update()
        for t in range(10000):
            RL_solver.action_prev = next(sequence)
            env.action = RL_solver.action_prev
            env.take_action()
            #print(time.time() - starttime)
            time.sleep(0.05 - ((time.time() - starttime) % 0.05))
            starttime = time.time()
            env.update()

            isSuccess = (abs(env.curr_robot_position[0]) > 1
                         or abs(env.curr_robot_position[1]) > 1)
            isFail = (t == 9999)
            reward = env.get_reward2()
            #print(reward)
            RL_solver.q_learning(env.features, reward)
            if abs(env.curr_robot_orientation[0]) > 3 or abs(
                    env.curr_robot_orientation[1]) > 3:
                print('incline too much, episoide stop')
                break
            if isSuccess:
                print('goal reached, episoide stop')
                break
            if isFail:
                print('fail to reach goal, episoide end')

        env.stop_simulation()
        print('episoide %d execution finished' % (i_episoide))

        if i_episoide % 500 == 499:
            RL_solver.save_weights(weight_path)
        time.sleep(1)
        env.start_simulation()

    #print ('body position', get_body_position_streaming(clientID, Body_Handle))
    env.stop_simulation()
    env.stop_client()

    weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir,
                               'Weights')
    if not os.path.exists(weight_path):
        print(
            '[ERROR] Weights path %s dose not exist, save weights to current path'
            % weight_path)
        weight_path = os.getcwd()
    RL_solver.save_weights(weight_path)

    print('[INFO] Program ended')
Exemple #3
0
import numpy as np
import Kinematics.Kinematics_firmware as kin
from Vrep.vrep_simulation import obstacle, environment
import time

env = environment()
env.start_simulation()
env.update()
env.robot_go_initial()
env.update()

dx = 50.0
dy = 50.0
alt = 15.0
radius = 153.0
x_ = np.cos(np.pi / 4) * radius
y_ = np.cos(np.pi / 4) * radius
kin_solver = kin.Kinematics()

initial_joint_position = np.array([
    0.0, -29.76, -123.87, 0.0, -29.76, -123.87, 0.0, -29.76, -123.87, 0.0,
    -29.76, -123.87
]) * np.pi / 180
joint_position = list(initial_joint_position)

for step in np.arange(1.0, 1.2, 0.2):
    for rise in np.arange(1.0, 6.0, 0.8):
        print("step=", step)
        print("rise=", rise)
        print("")
        print("")
def main():
            
    env = environment()
    env.start_simulation()
    env.set_goal(-1, -1)
    env.update()
    
    RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH)
    RL_solver.set_temperature(1.0)
    weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights')
    if not os.path.exists(weight_path):
        print('[ERROR] Weights path %s dose not exist, save weights to current path' % weight_path)
        weight_path = os.getcwd()
    
    if READ_WEIGHTS:
        RL_solver.load_q(weight_path)

    reward_hist = []
    
    for i_episoide in range(NUM_EPISOIDE):
        print('Start episoide %d' %(i_episoide))
        
        env.robot_go_initial()
        env.update()
        RL_solver.rl_init(env.states)
        
        for t in range(1000):
            env.action = RL_solver.action_prev
            #env.action = 0
            env.take_action()
            env.update()
            reward = env.get_reward()
            reward_hist.append(reward)
            '''
            if RL_solver.state_prev >= 880:
                RL_solver.state_aft = env.states
                RL_solver.make_policy_aft()
                RL_solver.state_prev = RL_solver.state_aft
                RL_solver.action_prev = RL_solver.action_aft_policy
            else:
            '''
            RL_solver.q_learning(env.states, reward)
            
            isSuccess = (env.distance < 0.05)
            if env.is_incline:
                print('incline too much, episoide stop')
                break
            if isSuccess:
                print('goal reached, episoide stop')
                break
                
        env.stop_simulation()
        print('episoide %d execution finished' %(i_episoide))
        

        RL_solver.save_q(weight_path)
        time.sleep(1)
        env.start_simulation()
        
    env.stop_simulation()
    env.stop_client()
    RL_solver.save_q(weight_path)

	plt.plot(reward_hist)
def main():

    env = environment()
    env.start_simulation()
    env.set_goal(-2, -2)
    env.update()

    experience_pool = []
    exp_pool_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir,
                                 'experience')
    exp_num = 0

    RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH)
    RL_solver.set_temperature(1.0)
    weight_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir,
                               'Weights')
    if not os.path.exists(weight_path):
        print(
            '[ERROR] Weights path %s dose not exist, save weights to current path'
            % weight_path)
        weight_path = os.getcwd()

    if READ_WEIGHTS:
        RL_solver.load_q(weight_path)

    for i_episoide in range(NUM_EPISOIDE):
        print('Start episoide %d' % (i_episoide))

        env.robot_go_initial()
        env.update()
        RL_solver.rl_init(env.states)

        for t in range(1000):
            env.action = RL_solver.action_prev
            curr_experience = [env.states, env.action]
            env.take_action()
            env.update()
            reward = env.get_reward()
            RL_solver.state_aft = env.states
            RL_solver.make_policy_aft()
            RL_solver.q_learning(env.states, reward)

            curr_experience += [
                reward, env.states, RL_solver.action_aft_policy
            ]
            experience_pool.append(curr_experience)
            exp_num += 1
            '''
            if RL_solver.state_prev == 880:
                RL_solver.state_aft = env.states
                RL_solver.make_policy_aft()
                RL_solver.state_prev = RL_solver.state_aft
                RL_solver.action_prev = RL_solver.action_aft_policy
            else:
            '''

            isSuccess = (env.distance < 0.05)
            if env.is_incline:
                print('incline too much, episoide stop')
                break
            if isSuccess:
                print('goal reached, episoide stop')
                break

            print("exp_len: ", len(experience_pool))
            print("current sarsa: ", curr_experience)

            if exp_num >= EXP_MAX:
                break

        if exp_num >= EXP_MAX:
            file_path = os.path.join(weight_path, "experience.pkl")
            with open(file_path, 'wb') as writefile:
                cPickle.dump(experience_pool, writefile)
                print('[INFO] Sucessfully save experience to %s' % (file_path))
            break

        env.stop_simulation()
        print('episoide %d execution finished' % (i_episoide))

        RL_solver.save_q(weight_path)
        time.sleep(1)
        env.start_simulation()

    env.stop_simulation()
    env.stop_client()

    RL_solver = rl(ACTION_SPACE_LENGTH, STATE_SPACE_LENGTH)

    if TRAIN_ON_EXP:
        for _ in tqdm.trange(10000):
            for idx, sarsa in enumerate(experience_pool):
                RL_solver.state_prev = sarsa[0]
                RL_solver.action_prev = sarsa[1]
                reward = sarsa[2]
                states_ = sarsa[3]
                RL_solver.action_aft_policy = sarsa[4]
                RL_solver.q_learning(states_, reward)

        RL_solver.save_q(weight_path)

    print('[INFO] Program ended')