Beispiel #1
0
    # policy = np.load(folder_dir + 'misc_policy1.npy')
    # policy = np.load(folder_dir + 'misc_policy2.npy')

    # Define the starting 3D coordinate position of agent
    starting_point = np.array([-0.025, 0.05, -0.135])
    # Define the goal position (3D coordinate) of agent
    goal_point = np.array([0.005, 0.055, -0.125])
    # Initialize an array of visited states by agent
    visited_states = starting_point
    # Create the Object of class which creates the state space and action space
    # Pass the required gridsize, discount, terminal_state_val_from_trajectory):
    env_obj = RobotStateUtils(11, 0.1, goal_point)
    states = env_obj.create_state_space_model_func()
    action = env_obj.create_action_set_func()
    # Get the starting and goal position index values in the grid world
    start_index = env_obj.get_state_val_index(starting_point)
    goal_index = env_obj.get_state_val_index(goal_point)
    done = False
    current_state_idx = start_index
    print "policy is ", policy[current_state_idx]
    # Repeat until the agent reaches the goal state
    limit_val = 0
    while not done:
        next_state = []
        # Find the action to take based on the policy learnt
        action_idx = policy[current_state_idx]
        print "action to be taken ", action[action_idx]
        # Find the resulting state
        for i in range(0, 3):
            next_state.append(
                round(
from numpy import savetxt
from robot_markov_model import RobotMarkovModel

if __name__ == '__main__':
    # Location to store the computed policy
    file_name = "/home/vignesh/Desktop/individual_trials/version4/data1/policy_grid11.txt"
    # term_state = np.random.randint(0, grid_size ** 3)]
    goal = np.array([0.005, 0.055, -0.125])
    # Create objects of classes
    env_obj = RobotStateUtils(11, 0.01, goal)
    mdp_obj = RobotMarkovModel()
    # Store the expert trajectories
    trajectories = mdp_obj.generate_trajectories()
    index_vals = np.zeros(len(trajectories[0]))
    for i in range(len(trajectories[0])):
        index_vals[i] = env_obj.get_state_val_index(trajectories[0][i])
    # print index_vals
    states = env_obj.create_state_space_model_func()
    action = env_obj.create_action_set_func()
    rewards = np.zeros(len(states))
    index = env_obj.get_state_val_index(goal)
    for _, ele in enumerate(index_vals):
        if ele == index:
            rewards[int(ele)] = 10
        else:
            rewards[int(ele)] = 1

    policy = env_obj.value_iteration(rewards)
    file_open = open(file_name, 'a')
    savetxt(file_open, policy, delimiter=',', fmt="%10.5f", newline=", ")
    file_open.write("\n \n \n \n")
 p.home()
 rospy_rate = 10
 # Define the starting cartesian position
 starting_point = np.array([-0.008, 0.053, -0.134])
 # Load the policy to follow
 policy_folder_dir = '/home/vignesh/PycharmProjects/dvrk_automated_suturing/learnt_policies/'
 policy = np.load(policy_folder_dir + 'best_policy.npy')
 # Define cartesian goal point
 goal_point = np.array([0.005, 0.055, -0.125])
 # Create the Object of class which creates the state space and action space
 # Pass the required gridsize, discount, terminal_state_val_from_trajectory):
 env_obj = RobotStateUtils(11, 0.8, goal_point)
 states = env_obj.create_state_space_model_func()
 action = env_obj.create_action_set_func()
 # Get the starting and goal position index values in the grid world
 start_index = env_obj.get_state_val_index(starting_point)
 goal_index = env_obj.get_state_val_index(goal_point)
 done = False
 current_state_idx = start_index
 # Repeat until the agent reaches the goal state
 limit_val = 0
 while not done:
     # Find the action to take based on the policy learnt
     action_idx = policy[start_index]
     # Find the resulting state
     next_state = states[current_state_idx] + action[action_idx]
     # Command the dVRK to move to new position
     p.move(PyKDL.Vector(float(next_state), float(next_state), float(next_state)))
     print("PSM arm new position is ", next_state[0], next_state[1], next_state[2])
     next_state_idx = env_obj.get_state_val_index(next_state)
     current_state_idx = next_state_idx