class Walker: def __init__(self, n_features, n_actions): self.n_features = n_features self.n_actions = n_actions # env, simulate observations env = open('env_hole.pkl', 'rb') self.observe_env = pickle.load(env) env.close() env = open('env_hole_vol.pkl', 'rb') self.observe_vol = pickle.load(env) env.close() # current position self.pos_x = None self.pos_y = 1.0 self.pos_z = None # 8 action dim self.action_labels = [ '0', '45', '90', '135', '180', '225', '270', '315' ] self.actor = Actor(self.n_features, self.n_actions, lr=0.004) self.actor.load_trained_model("save/multiple/hole/save100.ckpt") # fixme, first define critic before load : will report bug for not found in checkpoint self.critic = Critic(self.n_features, self.n_actions, lr=0.003, gamma=0.95) # fixme, use trained model to predict self.bin_graph = tf.Graph() with self.bin_graph.as_default(): self.bin_classfic = BinSupervisor(366, 2) # fixme, avoid obstacles, env defined in A star self.astar = Astar() def reset_walker_pos(self, x, y, z): self.pos_x = x self.pos_y = y self.pos_z = z # '-2.0_1_2.0':[gcc_vector, label] def observe_gcc_vector(self, x, y, z): # pick as key key = str(float(x)) + "_" + str(y) + "_" + str(float(z)) return self.observe_env[key][0] # '-2.0_1_2.0':vol def observe_volume(self, x, y, z): # return is a 4-dim vector for each mic key = str(float(x)) + "_" + str(y) + "_" + str(float(z)) return self.observe_vol[key] def choose_action(self, s, invalid_actions): a, p = self.actor.output_action(s, invalid_actions) return a, p def learn(self, s, a, s_, r): td = self.critic.learn(s, r, s_) self.actor.learn(s, a, td) # fixme, call binary model to judge in room or not # use argmax to determine def sound_in_room(self, x): with self.bin_graph.as_default(): acts = self.bin_classfic.is_in_room(x) if np.argmax(acts) == 0: return True else: return False def find_shortest_path(self, sx, sz, dx, dz): return self.astar.find_path(sx, sz, dx, dz)