def score_documents_tf(user_obs,
                       doc_obs,
                       no_click_mass=1.0,
                       is_mnl=False,
                       min_normalizer=-1.0):
    """Computes unnormalized scores given both user and document observations.

  This implements both multinomial proportional model and multinormial logit
    model given some parameters. We also assume scores are based on inner
    products of user_obs and doc_obs.

  Args:
    user_obs: An instance of AbstractUserState.
    doc_obs: A numpy array that represents the observation of all documents in
      the candidate set.
    no_click_mass: a float indicating the mass given to a no click option
    is_mnl: whether to use a multinomial logit model instead of a multinomial
      proportional model.
    min_normalizer: A float (<= 0) used to offset the scores to be positive when
      using multinomial proportional model.

  Returns:
    A float tensor that stores unnormalzied scores of documents and a float
      tensor that represents the score for the action of picking no document.
  """
    user_obs = tf.reshape(user_obs, [1, -1])
    scores = tf.reduce_sum(input_tensor=tf.multiply(user_obs, doc_obs), axis=1)
    all_scores = tf.concat([scores, tf.constant([no_click_mass])], axis=0)
    if is_mnl:
        all_scores = tf.nn.softmax(all_scores)
    else:
        all_scores = all_scores - min_normalizer
    return all_scores[:-1], all_scores[-1]
  def projection_dist(states):
    inner = tf.multiply(states - starting_states, goals - starting_states)
    upper = tf.reduce_sum(inner, -1)
    sign = tf.sign(upper)
    
    result = tf.math.divide(upper, tf.norm(goals - starting_states, ord=2))

    term_1 = tf.norm(states - starting_states, 2)
   
    
    return -1*term_1+result
  def normalized_dist(states):
    inner = tf.multiply(states - starting_states, goals - starting_states)
    upper = tf.reduce_sum(inner, -1)
    sign = tf.sign(upper)
    
    result = sign * tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)))

    term_1 = tf.square(tf.norm(states - starting_states, 2))
    term_2 = tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)))
    
    return tf.sqrt(epsilon + tf.abs(result - alpha * (term_1 - term_2)))
Esempio n. 4
0
def kaf(linear, name, kernel='rbf', D=None, gamma=None):
    if D is None:
        D = tf.linspace(start=-2., stop=2., num=20)

    with tf.variable_scope('kaf', reuse=tf.AUTO_REUSE):
        if kernel == "rbf":
            K = gauss_kernel(linear, D, gamma=gamma)
            alpha = tf.get_variable(name, shape=(1, linear.get_shape()[-1], D.get_shape()[0]),
                                    initializer=tf.random_normal_initializer(stddev=0.1))
        elif kernel == 'rbf2d':
            Dx, Dy = tf.meshgrid(D, D)
            K = gauss_kernel2D(linear, Dx, Dy, gamma=gamma)

            alpha = tf.get_variable(name,
                                    shape=(1, linear.get_shape()[-1] // 2, D.get_shape()[0] * D.get_shape()[0]),
                                    initializer=tf.random_normal_initializer(stddev=0.1))
        else:
            raise NotImplementedError()
        act = tf.reduce_sum(tf.multiply(K, alpha), axis=-1)
        # act = tf.squeeze(act, axis=0)
    return act
Esempio n. 5
0
    def __call__(self, x):

        K = gauss_kernel(x, self.d, self.gamma)
        return tf.reduce_sum(tf.multiply(K, self.alpha), axis=-1)