def score_documents_tf(user_obs, doc_obs, no_click_mass=1.0, is_mnl=False, min_normalizer=-1.0): """Computes unnormalized scores given both user and document observations. This implements both multinomial proportional model and multinormial logit model given some parameters. We also assume scores are based on inner products of user_obs and doc_obs. Args: user_obs: An instance of AbstractUserState. doc_obs: A numpy array that represents the observation of all documents in the candidate set. no_click_mass: a float indicating the mass given to a no click option is_mnl: whether to use a multinomial logit model instead of a multinomial proportional model. min_normalizer: A float (<= 0) used to offset the scores to be positive when using multinomial proportional model. Returns: A float tensor that stores unnormalzied scores of documents and a float tensor that represents the score for the action of picking no document. """ user_obs = tf.reshape(user_obs, [1, -1]) scores = tf.reduce_sum(input_tensor=tf.multiply(user_obs, doc_obs), axis=1) all_scores = tf.concat([scores, tf.constant([no_click_mass])], axis=0) if is_mnl: all_scores = tf.nn.softmax(all_scores) else: all_scores = all_scores - min_normalizer return all_scores[:-1], all_scores[-1]
def projection_dist(states): inner = tf.multiply(states - starting_states, goals - starting_states) upper = tf.reduce_sum(inner, -1) sign = tf.sign(upper) result = tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)) term_1 = tf.norm(states - starting_states, 2) return -1*term_1+result
def normalized_dist(states): inner = tf.multiply(states - starting_states, goals - starting_states) upper = tf.reduce_sum(inner, -1) sign = tf.sign(upper) result = sign * tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2))) term_1 = tf.square(tf.norm(states - starting_states, 2)) term_2 = tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2))) return tf.sqrt(epsilon + tf.abs(result - alpha * (term_1 - term_2)))
def kaf(linear, name, kernel='rbf', D=None, gamma=None): if D is None: D = tf.linspace(start=-2., stop=2., num=20) with tf.variable_scope('kaf', reuse=tf.AUTO_REUSE): if kernel == "rbf": K = gauss_kernel(linear, D, gamma=gamma) alpha = tf.get_variable(name, shape=(1, linear.get_shape()[-1], D.get_shape()[0]), initializer=tf.random_normal_initializer(stddev=0.1)) elif kernel == 'rbf2d': Dx, Dy = tf.meshgrid(D, D) K = gauss_kernel2D(linear, Dx, Dy, gamma=gamma) alpha = tf.get_variable(name, shape=(1, linear.get_shape()[-1] // 2, D.get_shape()[0] * D.get_shape()[0]), initializer=tf.random_normal_initializer(stddev=0.1)) else: raise NotImplementedError() act = tf.reduce_sum(tf.multiply(K, alpha), axis=-1) # act = tf.squeeze(act, axis=0) return act
def __call__(self, x): K = gauss_kernel(x, self.d, self.gamma) return tf.reduce_sum(tf.multiply(K, self.alpha), axis=-1)