grid_mdp = GridWorldMDP(map_struct['seed_map'], map_struct['goal'], map_struct['start'], map_struct['bridge_probabilities'], map_struct['bridge_locations']) init_value = {} for s in grid_mdp.states: init_value[s.tostring()] = np.linalg.norm( s - grid_mdp.goal_state) mdp = MDP(grid_mdp.states, grid_mdp.valid_actions_function, grid_mdp.cost_function) #value_fcn = mdp.value_iteration(value = value_fcn, plot=True, world_size = 50) value_fcn = mdp.value_iteration(value=init_value, plot=True, world_size=50) #set up dubins astar dub = dubins_astar(world_points, value_fcn) astar = AStar(motion_primitives, dub.cost_function, dub.heuristic, dub.valid_edge, dub.state_equality, plot=False) astar_state = np.array( [state['x'], state['y'], state['theta']]) else: '''
#set up grid world mdp ''' grid_mdp = GridWorldMDP(map_struct['seed_map'], map_struct['goal']) ''' grid_mdp = GridWorldMDP(map_struct['seed_map'], map_struct['goal'], map_struct['start'], map_struct['bridge_probabilities'], map_struct['bridge_locations']) init_value = {} for s in grid_mdp.states: init_value[s.tostring()] = np.linalg.norm(s - grid_mdp.goal_state) mdp = MDP(grid_mdp.states, grid_mdp.valid_actions_function, grid_mdp.cost_function) #value_fcn = mdp.value_iteration(value = value_fcn, plot=True, world_size = 50) value_fcn = mdp.value_iteration(value = init_value, plot=True, world_size = 50) #set up dubins astar dub = dubins_astar(world_points, value_fcn) astar = AStar(motion_primitives, dub.cost_function, dub.heuristic, dub.valid_edge, dub.state_equality, plot = False) astar_state = np.array([state['x'],state['y'],state['theta']]) else: ''' following_dist = 0.0 temp_idx = dub.last_idx while following_dist < dub.look_ahead_dist temp_idx -= 1 path_diff = numpy.array([,]) following_dist += np.linalg.norm(path_diff)
def valid_actions_function(state): if np.array_equal(state,goal_state): return [np.array([ 0, 0])] else: return [a for a in actions if valid_state(state + a)] def cost_function(state, action): return np.linalg.norm(action) mdp = MDP(states, valid_actions_function, cost_function, converge_thr = 1) #V, pi = mdp.value_iteration(policy = init_policy, plot = True, world_size = world_size) #V, pi = mdp.value_iteration(policy = init_policy, value = init_value, plot = True, world_size = world_size) #V, pi = mdp.value_iteration(policy = init_policy) #V, pi = mdp.value_iteration(policy = init_policy, value = init_value) #V, pi = mdp.value_iteration(value = init_value, plot = True, world_size = world_size) V, pi = mdp.value_iteration(value = init_value) #V, pi = mdp.value_iteration(plot = True, world_size = world_size) #V, pi = mdp.value_iteration() value_mat = np.zeros((world_size,world_size)) Sx = [] Sy = [] Ax = [] Ay = [] for s in states: value_mat[s[0], s[1]] = V[s.tostring()] Sx.append(s[0]) Sy.append(s[1]) Ax.append(pi[s.tostring()][0]) Ay.append(pi[s.tostring()][1])
dists_to_bridge = [np.sqrt(dists_to_bridge[0]**2 + dists_to_bridge[1]**2)] prob_open = 1.0 replan_cost = 0.0 for (i, dist_to_bridge) in enumerate(dists_to_bridge): if dist_to_bridge <= radius: prob_open *= bridge_probabilities[i] if replan_costs[i] > replan_cost: replan_cost = replan_costs[i] i += 1 return action_cost + (1.0-prob_open)*replan_cost ''' mdp = MDP(states, valid_actions_function, cost_function, converge_thr=1, gamma=1) #V = mdp.value_iteration(policy = init_policy, plot = True, world_size = world_size) #V = mdp.value_iteration(policy = init_policy, value = init_value, plot = True, world_size = world_size) #V = mdp.value_iteration(policy = init_policy) #V = mdp.value_iteration(policy = init_policy, value = init_value) V = mdp.value_iteration(value=init_value, plot=True, world_size=world_size) #V = mdp.value_iteration(value = init_value) #V = mdp.value_iteration(plot = True, world_size = world_size) #V = mdp.value_iteration() ''' with open(map_name +'value.pickle', 'wb') as handle: pickle.dump(V, handle) '''
dists_to_bridge = np.sqrt(dists_to_bridge[:,0]**2 + dists_to_bridge[:,1]**2) else: dists_to_bridge = state - bridge_locations dists_to_bridge = [np.sqrt(dists_to_bridge[0]**2 + dists_to_bridge[1]**2)] prob_open = 1.0 replan_cost = 0.0 for (i, dist_to_bridge) in enumerate(dists_to_bridge): if dist_to_bridge <= radius: prob_open *= bridge_probabilities[i] if replan_costs[i] > replan_cost: replan_cost = replan_costs[i] i += 1 return action_cost + (1.0-prob_open)*replan_cost ''' mdp = MDP(states, valid_actions_function, cost_function, converge_thr = 1, gamma = 1) #V = mdp.value_iteration(policy = init_policy, plot = True, world_size = world_size) #V = mdp.value_iteration(policy = init_policy, value = init_value, plot = True, world_size = world_size) #V = mdp.value_iteration(policy = init_policy) #V = mdp.value_iteration(policy = init_policy, value = init_value) V = mdp.value_iteration(value = init_value, plot = True, world_size = world_size) #V = mdp.value_iteration(value = init_value) #V = mdp.value_iteration(plot = True, world_size = world_size) #V = mdp.value_iteration() ''' with open(map_name +'value.pickle', 'wb') as handle: pickle.dump(V, handle) '''