Beispiel #1
0
class Walker:
    def __init__(self, n_features, n_actions):
        self.n_features = n_features
        self.n_actions = n_actions

        # env, simulate observations
        env = open('env_hole.pkl', 'rb')
        self.observe_env = pickle.load(env)
        env.close()

        env = open('env_hole_vol.pkl', 'rb')
        self.observe_vol = pickle.load(env)
        env.close()

        # current position
        self.pos_x = None
        self.pos_y = 1.0
        self.pos_z = None

        # 8 action dim
        self.action_labels = [
            '0', '45', '90', '135', '180', '225', '270', '315'
        ]

        self.actor = Actor(self.n_features, self.n_actions, lr=0.004)
        self.actor.load_trained_model("save/multiple/hole/save100.ckpt")

        # fixme, first define critic before load : will report bug for not found in checkpoint
        self.critic = Critic(self.n_features,
                             self.n_actions,
                             lr=0.003,
                             gamma=0.95)

        # fixme, use trained model to predict
        self.bin_graph = tf.Graph()
        with self.bin_graph.as_default():
            self.bin_classfic = BinSupervisor(366, 2)

        # fixme, avoid obstacles, env defined in A star
        self.astar = Astar()

    def reset_walker_pos(self, x, y, z):
        self.pos_x = x
        self.pos_y = y
        self.pos_z = z

    # '-2.0_1_2.0':[gcc_vector, label]
    def observe_gcc_vector(self, x, y, z):
        # pick as key
        key = str(float(x)) + "_" + str(y) + "_" + str(float(z))
        return self.observe_env[key][0]

    # '-2.0_1_2.0':vol
    def observe_volume(self, x, y, z):
        # return is a 4-dim vector for each mic
        key = str(float(x)) + "_" + str(y) + "_" + str(float(z))
        return self.observe_vol[key]

    def choose_action(self, s, invalid_actions):
        a, p = self.actor.output_action(s, invalid_actions)
        return a, p

    def learn(self, s, a, s_, r):
        td = self.critic.learn(s, r, s_)
        self.actor.learn(s, a, td)

    # fixme, call binary model to judge in room or not
    # use argmax to determine
    def sound_in_room(self, x):
        with self.bin_graph.as_default():
            acts = self.bin_classfic.is_in_room(x)
            if np.argmax(acts) == 0:
                return True
            else:
                return False

    def find_shortest_path(self, sx, sz, dx, dz):
        return self.astar.find_path(sx, sz, dx, dz)