Ejemplo n.º 1
0
def main(grid_size, discount_factor, epochs, learning_rate):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Reads the trajectory data from the csv files provided and returns the trajectories
    # The trajectories is numpy array of the number of trajectories provided
    trajectories = robot_mdp.generate_trajectories()
    # Finds the length of the trajectories data
    n_trajectories = len(trajectories)
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 3
    irl = MaxEntIRL(n_trajectories, grid_size)
    # For storing results
    filename = "/home/vignesh/PycharmProjects/dvrk_automated_suturing/data/trial4_grid11_parallel/weights_grid11.txt"
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, weights = irl.max_ent_irl(trajectories, discount_factor,
                                      n_trajectories, epochs, learning_rate,
                                      filename)

    print "r is ", reward.reshape((grid_size, grid_size), order='F')
    # print "r shape ", reward.shape
    print "weights is ", weights.reshape((grid_size, grid_size), order='F')
    # print "policy is ", policy[0][0]
    file_open = open(filename, 'a')
    file_open.write("\n \n \n \n Final result \n")
    savetxt(filename, weights, delimiter=',', fmt="%10.5f", newline=", ")
    file_open.close()
Ejemplo n.º 2
0
def main(discount, epochs, learning_rate, weights, trajectory_length,
         n_policy_iter):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 1
    irl = MaxEntIRL(trajectory_length)

    # trajectory_features_array, trajectory_rewards_array = robot_mdp.trajectories_features_rewards_array(weights)
    # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
    sum_trajectory_features, complete_features_array = robot_mdp.generate_trajectories(
    )
    # print "traj array ", trajectory_features_array.shape, trajectory_features_array
    # print "complete array ", complete_features_array.shape, complete_features_array
    # print trajectory_rewards_array
    # print model_rot_par_r
    # Returns the state and actions spaces of the expert trajectory
    state_trajectory_array, action_trajectory_array = robot_mdp.trajectories_data(
    )
    # Finds the length of the trajectories data
    n_trajectories = len(state_trajectory_array)
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, alpha = irl.max_ent_irl(sum_trajectory_features,
                                    complete_features_array, discount,
                                    n_trajectories, epochs, learning_rate,
                                    n_policy_iter, weights)

    print "r is ", reward
    print "alpha is ", alpha
Ejemplo n.º 3
0
def main(grid_size, discount_factor, epochs, learning_rate):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Reads the trajectory data from the csv files provided and returns the trajectories
    # The trajectories is numpy array of the number of trajectories provided
    trajectories = robot_mdp.generate_trajectories()
    # Finds the length of the trajectories data
    n_trajectories = len(trajectories)
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 3
    irl = MaxEntIRL(n_trajectories, grid_size)
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, weights = irl.max_ent_irl(trajectories, discount_factor, n_trajectories, epochs, learning_rate)

    print "r is ", reward.reshape((grid_size, grid_size), order='F')
    # print "r shape ", reward.shape
    print "weights is ", weights.reshape((grid_size, grid_size), order='F')
Ejemplo n.º 4
0
def main(discount, epochs, learning_rate):
    # Creates an object for using the RobotMarkovModel class
    robot_mdp = RobotMarkovModel()
    # Finds the sum of features of the expert trajectory and list of all the features of the expert trajectory
    sum_trajectory_features, feature_array_all_trajectories = robot_mdp.generate_trajectories()
    # Finds the length of the trajectories data
    n_trajectories = len(feature_array_all_trajectories)
    # Initialize the IRL class object, provide trajectory length as input, currently its value is 1
    irl = MaxEntIRL(n_trajectories)
    # Calculates the reward function based on the Max Entropy IRL algorithm
    reward, weights = irl.max_ent_irl(sum_trajectory_features, feature_array_all_trajectories, discount,
                                    n_trajectories, epochs, learning_rate)

    print "r is ", reward
    # print "r shape ", reward.shape
    print "weights is ", weights
Ejemplo n.º 5
0
        return V, policy


if __name__ == '__main__':
    # goal = np.array([0.005, 0.055, -0.125])
    goal = np.array([0.5, 0.5, 0])
    # term_state = np.random.randint(0, grid_size ** 3)]
    env_obj = RobotStateUtilsP(11, 0.9, goal)
    states = env_obj.create_state_space_model_func()
    action = env_obj.create_action_set_func()
    print "State space created is ", states
    print "actions is ", action
    index_val = env_obj.get_state_val_index(goal)
    print "index val is ", index_val
    mdp_obj = RobotMarkovModel()
    trajectories = mdp_obj.generate_trajectories()
    index_vals = np.zeros(len(trajectories[0]))
    rewards = np.zeros(len(states))
    for i in range(len(trajectories[0])):
        # print "traj is ", trajectories[0][i]
        index_vals[i] = env_obj.get_state_val_index(trajectories[0][i])
    for _, ele in enumerate(index_vals):
        if ele == index_val:
            rewards[int(ele)] = 10
        else:
            rewards[int(ele)] = 1
    env_obj.rewards = rewards
    print "rewards is ", rewards
    # initialize V(s)
    V = {}
    for state in env_obj.states: