def testTransitionHFMaxFood1(self): random.seed(1) state_str = 'B---\n--HF' grid = problem.parse(state_str) action = 'HF_3_1' harvester_world = problem.to_problem(x=4, y=2, max_food=1) distances = problem.distance_to_base(grid, harvester_world) distances = problem.add_distance_to_food(grid, distances, harvester_world) belief_state = problem.to_state(grid, distances=distances) food_dist = problem.chance_of_food(belief_state, harvester_world) future_food = problem.sample_future_food(food_dist, n=1) initial_state = problem.to_state(grid, distances=distances, future_food=future_food) next_state, action_cost = transition(initial_state, action, harvester_world, time_left=1) self.assertEquals(next_state.grid, { (0, 1): 'F', (0, 0): 'B', (3, 1): '$', (2, 1): None }, next_state.grid) self.assertEquals(next_state.reward, -1, next_state.reward) self.assertEquals(action_cost, 1, action_cost) random.seed(None)
def update_belief(state, observations, known=False, reality_state=None): if not observations: return state if observations.obstacle: for obstacle, add_delete in observations.obstacle.iteritems(): if add_delete == 1: state.obstacle_dict[obstacle] = '#' if observations.harvester: for harvester, add_delete in observations.harvester.iteritems(): if add_delete == 1: state.harvester_dict[harvester] = 'H' else: del state.harvester_dict[harvester] state.explored_dict[harvester] = '-' if observations.food: for food, add_delete in observations.food.iteritems(): if add_delete == 1: state.food_dict[food] = 'F' else: if food in state.food_dict: del state.food_dict[food] if observations.defender: for defender, add_delete in observations.defender.iteritems(): if add_delete == 1: state.defender_dict[defender] = 'D' else: del state.defender_dict[defender] if observations.enemy: for enemy, add_delete in observations.enemy.iteritems(): if add_delete == 1: state.enemy_dict[enemy] = 'E' else: del state.enemy_dict[enemy] new_reward = observations.reward if not state.has_food and observations.has_food: state.explored_dict.clear() future_food = None if known: future_food = reality_state.future_food return problem.to_state(state.base_dict, state.harvester_dict, food=state.food_dict, obstacle=state.obstacle_dict, defender=state.defender_dict, enemy=state.enemy_dict, explored=state.explored_dict, has_food=observations.has_food, reward=new_reward, future_food=future_food, step_reward=observations.step_reward)
def testReturnMaxG(self): state_str = '-#\n$B' grid = problem.parse(state_str) harvester_world = problem.to_problem(x=2, y=2) distances = problem.distance_to_base(grid, harvester_world) distances = problem.add_distance_to_food(grid, distances, harvester_world) initial_state = problem.to_state(grid, distances=distances) max_g = search(initial_state, harvester_world, horizon=10) self.assertEquals(max_g, 49.0, max_g)
def testUpdateBelief(self): belief_str = 'F-\n-b' grid = problem.parse(belief_str) belief = problem.to_state(grid) observation_str = '--\nHB' observation_dict = problem.parse(observation_str) observation = problem.to_observation(observation_dict) new_belief = update_belief(belief, observation) self.assertEquals(new_belief.grid, {(0, 0): 'F', (1, 1): 'B', (0, 1): 'H'}, new_belief.grid) self.assertEquals(belief.grid, {(0, 0): 'F', (1, 1): 'b'}, belief.grid)
def testReturnPlan(self): state_str = '-#\n$B' grid = problem.parse(state_str) harvester_world = problem.to_problem(x=2, y=2) distances = problem.distance_to_base(grid, harvester_world) distances = problem.add_distance_to_food(grid, distances, harvester_world) initial_state = problem.to_state(grid, distances=distances) _ = search(initial_state, harvester_world, horizon=10, return_plan=True)
def testExpand(self): state_str = 'H-\n-B' base, harvester, food, obstacle, defender, enemy, has_food = problem.parse( state_str) state = problem.to_state(base, harvester, food, obstacle, defender, enemy, has_food) world = problem.to_problem(x=2, y=2) open_list = [] policy = [[None] * world.y for _ in range(world.x)] expand(((1, 1), 0), open_list, policy, state, world) self.assertEquals(open_list, [((1, 0), 1), ((0, 1), 1)], open_list) self.assertEquals(policy, [[None, ((1, 1), 1)], [((1, 1), 1), None]], policy)
def testDijkstra(self): state_str = '#-\n-b' base, harvester, food, obstacle, defender, enemy, has_food = problem.parse( state_str) state = problem.to_state(base, harvester, food, obstacle, defender, enemy, has_food) world = problem.to_problem(x=2, y=2) policy = dijkstra((1, 1), state, world) self.assertEquals(policy, { (0, 1): ((1, 1), 1), (1, 0): ((1, 1), 1), (1, 1): ('*', 0) }, policy)
def testTransitionHB(self): state_str = '---$\n---B' grid = problem.parse(state_str) action = 'HB' harvester_world = problem.to_problem(x=4, y=2) distances = problem.distance_to_base(grid, harvester_world) initial_state = problem.to_state(grid, distances=distances) next_state, action_cost = transition(initial_state, action, harvester_world, time_left=1) self.assertEquals(next_state.grid, { (3, 1): '*', (3, 0): None }, next_state.grid) self.assertEquals(next_state.reward, 49, next_state.reward) self.assertEquals(action_cost, 1, action_cost)
def testTransitionHF(self): state_str = 'B---\n--HF' grid = problem.parse(state_str) action = 'HF_3_1' harvester_world = problem.to_problem(x=4, y=2) distances = problem.distance_to_base(grid, harvester_world) distances = problem.add_distance_to_food(grid, distances, harvester_world) initial_state = problem.to_state(grid, distances=distances) next_state, action_cost = transition(initial_state, action, harvester_world, time_left=1) self.assertEquals(next_state.grid, { (0, 0): 'B', (3, 1): '$', (2, 1): None }, next_state.grid) self.assertEquals(next_state.reward, -1, next_state.reward) self.assertEquals(action_cost, 1, action_cost)
def init_reality(reality_file_name): """Constructs the initial state of the world from a file. param: reality_file_name: The path and name of a file illustrating the real world. See problem for format. return: Initial state of the world return: x, y: Dimensions of the world """ reality_str = '' x = 0 y = 0 with open(reality_file_name, 'r') as reality_file: for line in reality_file: reality_str += line x = len(line) - 1 y += 1 base, harvester, food, obstacle, defender, enemy, has_food, explored = problem.parse(reality_str) return problem.to_state(base, harvester, food=food, obstacle=obstacle, defender=defender, enemy=enemy, has_food=has_food, explored=explored), x, y
def init_belief(belief_file_name, future_food=None): """Constructs the agent's initial belief about the world from a file. param: belief_file_name: The path and name of a file illustrating the agent's belief. See problem for format. return: Agent's initial belief state """ belief_str = '' x = 0 y = 0 with open(belief_file_name, 'r') as belief: for line in belief: belief_str += line x = len(line) - 1 y += 1 base, harvester, food, obstacle, defender, enemy, has_food, explored = problem.parse(belief_str) return problem.to_state(base, harvester, food=food, obstacle=obstacle, defender=defender, enemy=enemy, has_food=has_food, future_food=future_food, explored=explored), x, y
def transition(state, destination, world, time_left=1, horizon=1): #print(destination, time_left) #print(problem.state_to_string(state, world)) harvester, _ = state.harvester_dict.iteritems().next() new_harvester_dict = copy.copy(state.harvester_dict) new_food_dict = copy.copy(state.food_dict) new_explored_dict = copy.copy(state.explored_dict) new_has_food = state.has_food new_reward = state.reward new_defender_dict = copy.copy(state.defender_dict) new_enemy_dict = copy.copy(state.enemy_dict) remaining_food = copy.copy(state.future_food) destination_policy = None for goal, policy in state.distances: if goal == destination: destination_policy = policy break distance = 1000 if harvester in destination_policy: _, distance = destination_policy[harvester] if distance < 1000 and distance > time_left: next_step, _ = destination_policy[harvester] step_count = 1 distance = step_count destination = next_step while step_count < time_left: next_step, _ = destination_policy[next_step] step_count += 1 destination = next_step distance = step_count deploy_defender = False defender, _ = state.defender_dict.iteritems().next() if len(state.enemy_dict) > 0: turn = True next_step, _ = destination_policy[harvester] step_count = 1 turn = not turn enemy, _ = state.enemy_dict.iteritems().next() #enemy_policy = None #for goal, policy in state.distances: # if goal == next_step: # enemy_policy = policy # break enemy_step = enemy #enemy_step, _ = enemy_policy[enemy_step] new_enemy = enemy_step # TODO: I might not need to store the previous step any more while step_count < distance or not turn: if enemy_step == next_step: destination = next_step distance = step_count new_enemy = enemy_step deploy_defender = True break else: if turn: next_step, _ = destination_policy[next_step] step_count += 1 turn = not turn else: #enemy_policy = None #for goal, policy in state.distances: # if goal == next_step: # enemy_policy = policy # break enemy_policy = dijkstra.dijkstra(next_step, state, world, enemy=True) new_enemy = enemy_step if enemy_step in enemy_policy: try_enemy_step, _ = enemy_policy[enemy_step] if try_enemy_step not in state.defender_dict: new_enemy = try_enemy_step turn = not turn del new_enemy_dict[enemy] new_enemy_dict[new_enemy] = 'E' del new_harvester_dict[harvester] new_harvester_dict[destination] = 'H' #new_explored_dict[harvester] = '-' if state.has_food and destination in state.base_dict: new_reward += 50 * pow(0.95, horizon - time_left + distance) new_has_food = False if not state.has_food and destination in state.food_dict: del new_food_dict[destination] new_has_food = True new_explored_dict = {} while len(new_food_dict) < world.max_food: while True: try_x = remaining_food.pop() remaining_food.insert(0, try_x) while try_x >= world.x: try_x = remaining_food.pop() remaining_food.insert(0, try_x) try_y = remaining_food.pop() remaining_food.insert(0, try_y) while try_y >= world.y: try_y = remaining_food.pop() remaining_food.insert(0, try_y) try_coordinate = (try_x, try_y) if try_coordinate not in state.base_dict \ and try_coordinate not in state.obstacle_dict \ and try_coordinate not in new_harvester_dict \ and try_coordinate not in new_explored_dict: new_food_dict[try_coordinate] = 'F' break new_reward -= distance alt_reward = new_reward if deploy_defender: destination_x, destination_y = destination defender_x, defender_y = defender alt_reward -= 1 * (abs(destination_x - defender_x) + abs(destination_y - defender_y)) next_state = problem.to_state(state.base_dict, new_harvester_dict, food=new_food_dict, obstacle=state.obstacle_dict, defender=state.defender_dict, explored=new_explored_dict, enemy=new_enemy_dict, has_food=new_has_food, reward=new_reward, future_food=remaining_food, distances=state.distances) alt_enemy_dict = copy.copy(new_enemy_dict) if deploy_defender: new_defender_dict[destination] = 'D' if len(state.defender_dict) > 0: defender, _ = state.defender_dict.iteritems().next() del new_defender_dict[defender] alt_reward -= 10 if new_enemy in new_defender_dict: new_enemy_x, new_enemy_y = new_enemy if new_enemy_x + 1 < world.x and ( new_enemy_x + 1, new_enemy_y) not in state.obstacle_dict: del alt_enemy_dict[new_enemy] #observation_enemy_dict[enemy] = -1 alt_enemy_dict[(new_enemy_x + 1, new_enemy_y)] = 'E' #observation_enemy_dict[(new_enemy_x + 1, new_enemy_y)] = 1 elif new_enemy_x - 1 >= 0 and ( new_enemy_x - 1, new_enemy_y) not in state.obstacle_dict: del alt_enemy_dict[new_enemy] #observation_enemy_dict[enemy] = -1 alt_enemy_dict[(new_enemy_x - 1, new_enemy_y)] = 'E' #observation_enemy_dict[(new_enemy_x - 1, new_enemy_y)] = 1 elif new_enemy_y + 1 < world.y and (new_enemy_x, new_enemy_y + 1) not in state.obstacle_dict: del alt_enemy_dict[new_enemy] #observation_enemy_dict[enemy] = -1 alt_enemy_dict[(new_enemy_x, new_enemy_y + 1)] = 'E' #observation_enemy_dict[(new_enemy_x, new_enemy_y + 1)] = 1 elif new_enemy_y - 1 >= 0 and (new_enemy_x, new_enemy_y - 1) not in state.obstacle_dict: del alt_enemy_dict[new_enemy] #observation_enemy_dict[enemy] = -1 alt_enemy_dict[(new_enemy_x, new_enemy_y - 1)] = 'E' #observation_enemy_dict[(new_enemy_x, new_enemy_y - 1)] = 1 alt_state = problem.to_state(state.base_dict, new_harvester_dict, food=new_food_dict, obstacle=state.obstacle_dict, defender=new_defender_dict, explored=new_explored_dict, enemy=alt_enemy_dict, has_food=new_has_food, reward=alt_reward, future_food=remaining_food, distances=state.distances) result = [(next_state, distance)] if deploy_defender: result.append((alt_state, distance)) return result
obstacle_dict[(x, y)] = '#' if int(args.scenario) == 3: belief_obstacle_dict[(x, y)] = '#' #i += 1 i = 0 while i < int(args.enemy): x, y = random_coordinate(int(args.width), int(args.height)) if (x, y) not in base_dict \ and (x, y) not in food_dict\ and (x, y not in obstacle_dict): enemy_dict[(x, y)] = 'E' i += 1 belief = problem.to_state(base_dict, harvester_dict, food=belief_food_dict, obstacle=belief_obstacle_dict) reality = problem.to_state(base_dict, harvester_dict, food=food_dict, obstacle=obstacle_dict, enemy=enemy_dict) with open("../test/{0}_{1}_real.world".format(args.file_name, w), "w") as world_file: world_file.write(problem.state_to_string(reality, dimensions)) with open("../test/{0}_{1}_belief.world".format(args.file_name, w), "w") as world_file: world_file.write(problem.state_to_string(belief, dimensions))
harvester_world = problem.to_problem(x, y, int(args.max_food), int(args.known), enemy=problem_has_enemy, scenario=int(args.scenario)) # food_dist = problem.chance_of_food(reality_state, harvester_world) future_food = problem.sample_n_future_food(harvester_world, 100) # for i in range(1000): # future_food.append(problem.sample_cell(food_dist)[1]) distances = problem.all_distances(reality_state, harvester_world) reality_state = problem.to_state(reality_state.base_dict, reality_state.harvester_dict, food=reality_state.food_dict, obstacle=reality_state.obstacle_dict, defender=reality_state.defender_dict, enemy=reality_state.enemy_dict, has_food=reality_state.has_food, future_food=future_food, distances=distances) if harvester_world.known: belief_state, _, _ = init_belief(args.belief, future_food=future_food) else: belief_state, _, _ = init_belief(args.belief) time_step = 0 print_args(args) print_step(time_step, reality_state, belief_state, harvester_world) while time_step < int(args.time):