def check_ILASP_cover(hypothesis, pos, height, width, link): ''' Check hypothesis needs to be refined ''' if pos == None: return True helper.silentremove(cf.BASE_DIR, cf.CHECK_LAS) output_las = os.path.join(cf.BASE_DIR, cf.CHECK_LAS) if link: helper.append_to_file(link, output_las) helper.append_to_file(hypothesis, output_las) helper.append_to_file(pos, output_las) copy_las_base(height, width, output_las) remove_mode(output_las) print("checking ILASP necessity...") hypothesis = run_ILASP(output_las) if hypothesis == "": return True else: return False
def test_send_background_to_clingo(self): inputfile = os.path.join(BASE_DIR, "input.txt") outputfile = os.path.join(BASE_DIR, "output.txt") helper.silentremove(BASE_DIR, "output.txt") abduction.send_background_to_clingo(inputfile, outputfile) result = None with open(outputfile, "r") as file: result = file.read() self.assertEqual(result, "hello")
def test_copy_las_base(self): helper.silentremove(BASE_DIR, "las_base1.las") lasbase = os.path.join(BASE_DIR, "las_base1.las") induction.copy_las_base(lasbase, 1, 1) result = "" with open(lasbase, "r") as file: lines = file.readlines() for l in lines: result = result + l self.assertEqual(result, "cell((0..1, 0..1)).\n")
def test_log_las(self): helper.silentremove(BASE_DIR, "las_episode2_at_1.las") inputfile = os.path.join(BASE_DIR, "input_log.txt") hypothesis = "h" episode = 2 time = 1 helper.log_las(inputfile, hypothesis, BASE_DIR, episode, time) unique_path = os.path.join(BASE_DIR, "las_episode2_at_1.las") with open(unique_path, "r") as file: result = file.readlines() self.assertEqual(result, ['%ILASP\n', '\n', '%HELLO'])
def test_log_asp(self): helper.silentremove(BASE_DIR, "clingo_episode2_at_1.lp") inputfile = os.path.join(BASE_DIR, "input_log.txt") output = "asp" episode = 2 time = 1 helper.log_asp(inputfile, output, BASE_DIR, episode, time) unique_path = os.path.join(BASE_DIR, "clingo_episode2_at_1.lp") with open(unique_path, "r") as file: result = file.readlines() self.assertEqual(result, ['HELLO%ANSWER SETS\n', '\n', '%a\n', '%s\n', '%p\n'])
def test_add_new_walls(self): wall_list = induction.get_all_walls(env) agent_position = env.unwrapped.observer.get_observation()["position"] helper.silentremove(BASE_DIR, "background_test2.lp") backgroundfile = os.path.join(BASE_DIR, "background_test2.lp") is_new_wall = abduction.add_new_walls(agent_position, wall_list, backgroundfile) self.assertTrue(is_new_wall) is_new_wall2 = abduction.add_new_walls(agent_position, wall_list, backgroundfile) self.assertFalse(is_new_wall2)
def test_send_state_transition_pos(self): previous_state = [1, 1] next_state = [2, 1] action = "right" wall_list = induction.get_all_walls(env) lasfile = os.path.join(BASE_DIR, "las_test2.las") helper.silentremove(BASE_DIR, "las_test2.las") background = os.path.join(BASE_DIR, "background_test4.lp") helper.silentremove(BASE_DIR, "background_test4.lp") induction.send_state_transition_pos(previous_state, next_state, action, wall_list, lasfile, background) size_las = os.stat(lasfile).st_size size_background = os.stat(background).st_size self.assertGreater(size_las, 0) self.assertEqual(size_background, 0)
def test_make_lp(self): helper.silentremove(BASE_DIR, "clingo_test.lp") helper.silentremove(BASE_DIR, "newlp.las") h = "H" lasfile = os.path.join(BASE_DIR, "las_test.las") backgroundfile = os.path.join(BASE_DIR, "background_test3.lp") clingo_file = os.path.join(BASE_DIR, "newlp.las") start_state = (1, 1) goal_state = (2, 2) time_range = 10 cell_range = "cell" abduction.make_lp(h, lasfile, backgroundfile, clingo_file, start_state, goal_state, time_range, cell_range) size = os.stat(clingo_file).st_size self.assertGreater(size, 0)
def get_inc_exc(hypothesis, state_before, state_after, action, walls, cell_range, link=None): helper.silentremove(cf.BASE_DIR, cf.GROUNDING) helper.append_to_file(hypothesis, cf.GROUNDING_DIR) helper.append_to_file(state_before + "\n", cf.GROUNDING_DIR) helper.append_to_file(action + "\n", cf.GROUNDING_DIR) helper.append_to_file(cf.ADJACENT, cf.GROUNDING_DIR) helper.append_to_file(cell_range, cf.GROUNDING_DIR) show = "#show state_after/1.\n" helper.append_to_file(show, cf.GROUNDING_DIR) if link: helper.append_to_file(link, cf.GROUNDING_DIR) for wall in walls: wall = "wall(" + str(wall) + ").\n" helper.append_to_file(wall, cf.GROUNDING_DIR) answer_sets = abduction.run_clingo(cf.GROUNDING_DIR) # The current hypothesis DOES predict the agent is there other than state_after, # then they are exclusions exclusions = "" for sa in answer_sets: if (state_after != sa): exclusions = exclusions + sa + "," # The current hypothesis DOES NOT predict the agent is there other than state_after, # then it is inclusion inclusion = "" if (state_after not in answer_sets): inclusion = inclusion + state_after if exclusions != "": return inclusion, exclusions[0:-1] else: return inclusion, exclusions
def test_silentmove(self): helper.create_file(BASE_DIR, self.__class__.filename) helper.append_to_file("hello", self.__class__.created_file) helper.silentremove(BASE_DIR, self.__class__.filename) self.assertTrue(os.stat(self.__class__.created_file).st_size == 0)
def k_learning(env, num_episodes, h, goal, epsilon=0.1, record_prefix=None, is_link=False): # Get cell range for the game height = env.unwrapped.game.height width = env.unwrapped.game.width cell_range = "\ncell((0..{}, 0..{})).\n".format(width - 1, height - 1) # Log everything and keep the record here log_dir = None if record_prefix: log_dir = os.path.join(cf.BASE_DIR, "log") log_dir = helper.gen_log_dir(log_dir, record_prefix) # the first abduction needs lots of basic information first_abduction = False keep_link = None # Clean up all the files first helper.silentremove(cf.BASE_DIR, cf.GROUNDING) helper.silentremove(cf.BASE_DIR, cf.LASFILE) helper.silentremove(cf.BASE_DIR, cf.CLINGOFILE) helper.silentremove(cf.BASE_DIR, cf.LAS_CACHE, cf.LAS_CACHE_PATH) helper.create_file(cf.BASE_DIR, cf.LAS_CACHE, cf.LAS_CACHE_PATH) cf.ALREADY_LINK = False # Copy pos examples that used in TL before tl_file = os.path.join(cf.BASE_DIR, "tl_pos.las") helper.copy_file(tl_file, cf.LASFILE) # Add mode bias and adjacent definition for ILASP induction.copy_las_base(height, width, cf.LASFILE, is_link) # record the current hypothesis hypothesis = h abduction.make_lp_base(cell_range) wall_list = induction.get_all_walls(env) stats = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes), episode_runtime=np.zeros(num_episodes)) stats_ilasp = plotting.TimeStats(ILASP_runtime=np.zeros((num_episodes, cf.TIME_RANGE))) stats_test = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes), episode_runtime=np.zeros(num_episodes)) for i_episode in range(num_episodes): print("==============NEW EPISODE======================") print("i_episode ", i_episode) start_total_runtime = time.time() previous_state = env.reset() agent_position = env.unwrapped.observer.get_observation()["position"] env.render() previous_state_at = py_asp.state_at(previous_state[0], previous_state[1], 0) t = 0 # Once the agent reaches the goal, the algorithm kicks in # Decaying epsilon greedy params # new_epsilon = epsilon*(1/(i_episode+1)**cf.DECAY_PARAM) new_epsilon = epsilon print("new_epsilon ", new_epsilon) while t < cf.TIME_RANGE: if first_abduction == False: # Convert syntax of H for ASP solver hypothesis_asp = py_asp.convert_las_asp(hypothesis) abduction.add_hypothesis(hypothesis_asp) abduction.add_start_state(agent_position) abduction.add_goal_state(goal) first_abduction = True # Update the starting position for Clingo agent_position = env.unwrapped.observer.get_observation( )["position"] abduction.update_agent_position(agent_position, t) abduction.update_time_range(agent_position, t) # Run clingo to get a plan answer_sets = abduction.run_clingo(cf.CLINGOFILE) states_plan, actions_array = abduction.sort_planning(answer_sets) # Record clingo if record_prefix: inputfile = os.path.join(cf.BASE_DIR, cf.CLINGOFILE) helper.log_asp(inputfile, answer_sets, log_dir, i_episode, t) # Execute the planning for action_index, action in enumerate(actions_array): print("---------Planning phase---------------------") # Flip a coin. If threshold < epsilon, explore randomly threshold = random.uniform(0, 1) if threshold < new_epsilon: action_int = randint(0, 3) if cf.IS_PRINT: print("Taking a pure random action...", helper.convert_action(action_int)) else: # Following the plan action_int = helper.get_action(action[1]) if cf.IS_PRINT: print("Following the plan...", helper.convert_action(action_int)) action_string = helper.convert_action(action_int) next_state, reward, done, _ = env.step(action_int) next_state_at = py_asp.state_at(next_state[0], next_state[1], t + 1) if done: reward = reward + 10 else: reward = reward - 1 # Meanwhile, accumulate all background knowlege abduction.add_new_walls(previous_state, wall_list, cf.CLINGOFILE) # Make ASP syntax of state transition pos1, pos2, link = induction.generate_pos( hypothesis, previous_state, next_state, action_string, wall_list, cell_range) if link is not None: keep_link = link # Update H if necessary if (not induction.check_ILASP_cover( hypothesis, pos1, height, width, keep_link)) or (not induction.check_ILASP_cover( hypothesis, pos2, height, width, keep_link)): start_time = time.time() hypothesis = induction.run_ILASP(cf.LASFILE, cf.CACHE_DIR) ilasp_runtime = (time.time() - start_time) stats_ilasp.ILASP_runtime[i_episode, t] += ilasp_runtime # Convert syntax of H for ASP solver hypothesis_asp = py_asp.convert_las_asp(hypothesis) abduction.update_h(hypothesis_asp) if record_prefix: inputfile = os.path.join(cf.BASE_DIR, cf.LASFILE) helper.log_las(inputfile, hypothesis, log_dir, i_episode, t) previous_state = next_state previous_state_at = next_state_at # Update stats stats.episode_rewards[i_episode] += reward stats.episode_lengths[i_episode] = action_index env.render() # time.sleep(0.1) t = t + 1 if done or (threshold < new_epsilon): break if not actions_array: t = t + 1 if done: break stats.episode_runtime[i_episode] += (time.time() - start_total_runtime) run_experiment(env, i_episode, stats_test, width, cf.TIME_RANGE) return stats, stats_test, stats_ilasp