def action_given_state(self, state): state = flatten(state) q_values = self.sess.run(self.core_q_values, feed_dict={self.core_state: state}) normed = u.normalised(u.raised(q_values[0], self.state_normalisation_squash)) action = u.weighted_choice(normed) print "CHOOSE\t based on state", state, "q_values", q_values, "(normed to", normed, ") => action", action return action
def action_given_state(self, state): state = flatten(state) q_values = self.sess.run(self.core_q_values, feed_dict={self.core_state: state}) normed = u.normalised(u.raised(q_values[0], self.state_normalisation_squash)) action = u.weighted_choice(normed) if random.random() < 0.1: print ">action_given_state state %s q_values %s normed %s action %s" % (state, q_values, normed, action) return action
def action_given_state(self, state): state = flatten(state) q_values = self.sess.run(self.core_q_values, feed_dict={self.core_state: state}) normed = u.normalised(u.raised(q_values[0], self.state_normalisation_squash)) action = u.weighted_choice(normed) if random.random() <= 0.05: q_values_str = " ".join(map(str, ["%0.2f" % v for v in q_values[0]])) normed_str = " ".join(map(str, ["%0.2f" % v for v in normed])) print ">action_given_state state %s q_values %s normed %s action %s" % (state, q_values_str, normed_str, action) return action
def action_given_state(self, state): state = flatten(state) q_values = self.sess.run(self.core_q_values, feed_dict={self.core_state: state}) normed = u.normalised( u.raised(q_values[0], self.state_normalisation_squash)) action = u.weighted_choice(normed) if random.random() <= 0.05: q_values_str = " ".join( map(str, ["%0.2f" % v for v in q_values[0]])) normed_str = " ".join(map(str, ["%0.2f" % v for v in normed])) print ">action_given_state state %s q_values %s normed %s action %s" % ( state, q_values_str, normed_str, action) return action
def q_values_normalised_for_pick(self, state): return u.normalised( u.raised(self.q_table[state], self.state_normalisation_squash))
def q_values_normalised_for_pick(self, state): return u.normalised(u.raised(self.q_table[state], self.state_normalisation_squash))