Ejemplo n.º 1
0
    def predict(self, votes, reviews):
        """ Predicts a set of vote examples using previous fitted model.

        Args:
          votes: list of dictionaries, representing votes, to predict
        helpfulness vote value.
          reviews: list of dictionaries, representing reviews, with review
        information for those in the training set.

        Returns:
          A list of floats with predicted vote values.
    """
        pred = []
        cold_start = 0
        for vote in votes:
            voter = vote['voter']
            author = vote['author']
            review = vote['review']
            product = reviews[review]['product']
            v = self.voter_map[voter] if voter in self.voter_map else -1
            a = self.author_map[author] if author in self.author_map else -1
            p = self.product_map[product] if product in self.product_map else -1
            if v != -1 and a != -1 and p != -1:
                prediction = self.overall_mean + self.voter_bias[voter] + \
                    self.review_a_bias[author] + self.review_p_bias[product] + \
                    self.tensor_dot(v, a, p)
                pred.append(sigmoid(prediction))
            else:
                pred.append(self.overall_mean)
                cold_start += 1
        print '-*- Cold-start ratio: %f' % (float(cold_start) / len(votes))
        return pred
Ejemplo n.º 2
0
  def get_cond_mean_and_var(self, groups, votes):
    """ Gets the conditional mean and variance of this variable.
    
        Observations:
        - Returns the variance of this variable used by Gibbs Sampling.
        - The distribution is conditioned on all other latent variables.
        - Some terms of this calculation (cond_var, var_dot) are constant in the
        same EM iteration being, thus, reused and reset in a new iteration
        (through reset_samples function).

        Args:
          groups: a dictionary of Group objects.
          votes: list of votes (training set).

        Returns:
          A 2-tuple with the mean and variance, both float values.
    """
    variance = 0.0
    mean = 0.0
    for i in self.related_votes:
      vote = votes[i] 
      rest = self.get_rest_value(groups, vote)
      mean += rest
    var_group = groups[self.name]
    mean /= var_group.var_H.value
    if self.cond_var is None: 
      self.cond_var = 1.0 / (1.0 / var_group.var_param.value + \
          float(self.num_votes) / var_group.var_H.value)
    if self.var_dot is None:
      self.var_dot = sigmoid(var_group.weight_param.value.T \
          .dot(self.features)[0,0]) / var_group.var_param.value 
    mean = (mean + self.var_dot) * self.cond_var 
    return mean, self.cond_var
Ejemplo n.º 3
0
  def get_derivative_2(self, value, variable_group):
    """ Gets the second derivative of the expectation with respect to the
        parameter.

        Observations:
        - The expectation is the expectation of the log-likelihood with
        respect to the latent variables posterior distribution, found in the
        E-step of the EM method.

        Args:
          value: value of the parameter to calculate the derivative at this
            point.
          variable_group: variable group whose weight of the regression used for
            calculating the mean of the distribution is represented by this
            parameter.
    
        Returns:
          The derivative at point value.
    """
    der = zeros((value.shape[0], value.shape[0]))
    for variable in variable_group.iter_variables():
      f = variable.features
      dot = value.T.dot(f)[0,0]
      sig = sigmoid(dot)
      sig1 = sigmoid_der1(dot)
      sig2 = sigmoid_der2(dot)
      if variable.feat_matrix is None:
        variable.feat_matrix = f.dot(f.T)
      der += ((min(variable.value, 1.0) - sig) * sig2 - sig1 * sig1) * variable.feat_matrix 
    der *= 1.0 / (variable_group.var_param.value)
    return der 
 def test_interaction_scalar_variable_get_cond_mean_and_var(self):
     groups = self.groups
     group = groups['lambda']
     variable = [
         v for v in groups['lambda'].iter_variables()
         if v.entity_id == ('a1', 'v1')
     ][0]
     related_votes = [
         {
             'review': 'r1',
             'author': 'a1',
             'voter': 'v1',
             'vote': 4
         },
         {
             'review': 'r2',
             'author': 'a1',
             'voter': 'v1',
             'vote': 5
         },
     ]
     true_var = 1 / (2 / self.var_H.value + 1 / group.var_param.value)
     rest_term = sum(
         [variable.get_rest_value(groups, v)
          for v in related_votes]) / self.var_H.value
     dot_term = aux.sigmoid(
         group.weight_param.value.T.dot(
             variable.features)[0, 0]) / group.var_param.value
     true_mean = true_var * (rest_term + dot_term)
     res_mean, res_var = variable.get_cond_mean_and_var(groups, self.votes)
     self.assertAlmostEqual(true_var, res_var)
     self.assertAlmostEqual(true_mean, res_mean)
 def test_interaction_get_der1(self):
   groups = self.groups
   group = groups['lambda']
   matrix = None
   y = None 
   der1 = 0
   param = group.weight_param
   for variable in group.iter_variables():
     variable.num_samples = 10
     variable.samples = [random() for _ in xrange(10)] 
     feat = variable.features
     dot = param.value.T.dot(feat)[0,0]
     for sample in variable.samples:
       der1 += (sample - aux.sigmoid(dot)) * aux.sigmoid_der1(dot) * feat
   der1 = 1/(group.var_param.value*10) * der1
   ntest.assert_allclose(der1, param.get_derivative_1(param.value, group), rtol=1, atol=1e-7)
def likelihood(groups, votes):
  ''' Gets the log-likelihood of the current set of variables and parameters.

      Observation: Auxiliary function for testing.

      Args:
        groups: dictionary of Group objects indexed by names.
        votes: dictionary of votes of training set.

      Returns:
        A flot with the log-likelihood value.
  '''
  likelihood = 0
  var_H = groups.itervalues().next().var_H.value
  for vote in votes:
    term = vote['vote'] - \
        groups['alpha'].get_instance(vote).value - \
        groups['beta'].get_instance(vote).value - \
        groups['xi'].get_instance(vote).value - \
        groups['u'].get_instance(vote).value.T \
        .dot(groups['v'].get_instance(vote).value)
    if groups['gamma'].contains(vote):
      term += groups['gamma'].get_instance(vote).value
    if groups['lambda'].contains(vote):
      term += groups['lambda'].get_instance(vote).value
    likelihood += term ** 2 / var_H + log(var_H)
  for group in groups.itervalues():
    if isinstance(group, models.EntityScalarGroup):
      for variable in group.iter_variables():
        likelihood += ((variable.value - \
            group.weight_param.value.T.dot(variable.features)) ** 2) / \
            group.var_param.value
        likelihood += log(group.var_param.value)
    elif isinstance(group, models.EntityArrayGroup):
      for variable in group.iter_variables():
        term = variable.value - group.weight_param.value.dot(variable.features)
        covar = group.var_param.value * identity(variable.shape[0])
        likelihood += term.T.dot(covar).dot(term)
        likelihood += log(det(covar))
    else:
      for variable in group.iter_variables():
        likelihood += ((variable.value - \
            aux.sigmoid(group.weight_param.value.T.dot(variable.features))) \
            ** 2) / group.var_param.value
        likelihood += log(group.var_param.value)
  likelihood *= - 1/2
  return likelihood
 def test_interaction_scalar_variable_get_cond_mean_and_var(self):
   groups = self.groups
   group = groups['lambda']
   variable = [v for v in groups['lambda'].iter_variables() if v.entity_id
       == ('a1', 'v1')][0]
   related_votes = [
       {'review': 'r1', 'author': 'a1', 'voter': 'v1', 'vote': 4},
       {'review': 'r2', 'author': 'a1', 'voter': 'v1', 'vote': 5},
   ]
   true_var = 1 / (2 / self.var_H.value + 1 / group.var_param.value)
   rest_term = sum([variable.get_rest_value(groups, v) for v in
       related_votes]) / self.var_H.value
   dot_term = aux.sigmoid(group.weight_param.value.T.dot(variable.features)
       [0,0]) / group.var_param.value
   true_mean = true_var * (rest_term + dot_term)
   res_mean, res_var = variable.get_cond_mean_and_var(groups, self.votes)
   self.assertAlmostEqual(true_var, res_var)
   self.assertAlmostEqual(true_mean, res_mean)
 def test_interaction_get_der1(self):
     groups = self.groups
     group = groups['lambda']
     matrix = None
     y = None
     der1 = 0
     param = group.weight_param
     for variable in group.iter_variables():
         variable.num_samples = 10
         variable.samples = [random() for _ in xrange(10)]
         feat = variable.features
         dot = param.value.T.dot(feat)[0, 0]
         for sample in variable.samples:
             der1 += (sample -
                      aux.sigmoid(dot)) * aux.sigmoid_der1(dot) * feat
     der1 = 1 / (group.var_param.value * 10) * der1
     ntest.assert_allclose(der1,
                           param.get_derivative_1(param.value, group),
                           rtol=1,
                           atol=1e-7)
 def test_e_step(self):
   ''' Because of Gibbs Sampling, there is no guarantee that EM iterations will
       always improve the likelihood. But we assume that the first iteration
       will in most cases.
   '''
   for _ in xrange(2):
     vote = self.votes[0]
     beta_0 = self.groups['beta'].get_instance(vote).value
     alpha_0 = self.groups['alpha'].iter_variables().next().value
     xi_0 = self.groups['xi'].iter_variables().next().value
     gamma_0 = self.groups['gamma'].iter_variables().next().value
     lambd_0 = self.groups['lambda'].iter_variables().next().value
     uv_0 = self.groups['u'].iter_variables().next().value.T \
       .dot(self.groups['v'].get_instance(vote).value)[0,0]
     old_likel = likelihood(self.groups, self.votes)
     old_pred = self.groups['beta'].get_instance(vote).value + \
         self.groups['alpha'].iter_variables().next().value + \
         self.groups['xi'].iter_variables().next().value + \
         self.groups['gamma'].iter_variables().next().value + \
         self.groups['lambda'].iter_variables().next().value + \
         self.groups['u'].iter_variables().next().value.T \
         .dot(self.groups['v'].get_instance(vote).value)[0,0]
     em.perform_e_step(self.groups, self.votes, 10, 0)
     pred_0 = self.groups['beta'].get_instance(vote).value + \
         self.groups['alpha'].iter_variables().next().value + \
         self.groups['xi'].iter_variables().next().value + \
         self.groups['gamma'].iter_variables().next().value + \
         self.groups['lambda'].iter_variables().next().value + \
         self.groups['u'].iter_variables().next().value.T \
         .dot(self.groups['v'].get_instance(vote).value)[0,0]
     beta = self.groups['beta'].get_instance(vote)
     alpha = self.groups['alpha'].iter_variables().next()
     xi = self.groups['xi'].iter_variables().next()
     gamma = self.groups['gamma'].iter_variables().next()
     lambd = self.groups['lambda'].iter_variables().next()
     uv = self.groups['u'].iter_variables().next().value.T \
       .dot(self.groups['v'].get_instance(vote).value)[0,0]
    # print vote['vote'], old_pred, pred_0
    # print alpha_0, alpha.value
    # print beta_0, beta.value
    # print xi_0, xi.value
    # print uv_0, uv
    # print gamma_0, gamma.value
    # print lambd_0, lambd.value
    # self.assertGreaterEqual(likelihood(self.groups, self.votes), old_likel)
    # self.assertGreaterEqual((vote['vote'] - old_pred) ** 2, (vote['vote'] -
    #     pred_0) ** 2)
     u = self.groups['u'].iter_variables().next()
     v = self.groups['v'].get_instance(vote)
     g_0 = self.groups['beta'].weight_param.value
     d_0 = self.groups['alpha'].weight_param.value
     b_0 = self.groups['xi'].weight_param.value
     r_0 = self.groups['gamma'].weight_param.value
     h_0 = self.groups['lambda'].weight_param.value
     W_0 = self.groups['u'].weight_param.value
     V_0 = self.groups['v'].weight_param.value
     em.perform_m_step(self.groups, self.votes)
     g_n = self.groups['beta'].weight_param.value
     d_n = self.groups['alpha'].weight_param.value
     b_n = self.groups['xi'].weight_param.value
     r_n = self.groups['gamma'].weight_param.value
     h_n = self.groups['lambda'].weight_param.value
     W_n = self.groups['u'].weight_param.value
     V_n = self.groups['v'].weight_param.value
    # print beta.value, g_0.T.dot(beta.features)[0,0], g_n.T.dot(beta.features)[0,0]
    # print alpha.value, d_0.T.dot(alpha.features)[0,0], d_n.T.dot(alpha.features)[0,0]
    # print xi.value, b_0.T.dot(xi.features)[0,0], b_n.T.dot(xi.features)[0,0]
    # print gamma.value, aux.sigmoid(r_0.T.dot(gamma.features)[0,0]), \
    #     aux.sigmoid(r_n.T.dot(gamma.features)[0,0])
    # print lambd.value, aux.sigmoid(h_0.T.dot(lambd.features)[0,0]), \
    #     aux.sigmoid(h_n.T.dot(lambd.features)[0,0])
    # print u.value
    # print W_0.dot(u.features)
    # print W_n.dot(u.features)
    # print v.value
    # print V_0.dot(v.features)
    # print V_n.dot(v.features)
     self.assertGreaterEqual(abs(beta.value - g_0.T.dot(beta.features)[0,0]), \
         abs(beta.value - g_n.T.dot(beta.features)[0,0]))
     self.assertGreaterEqual(abs(alpha.value - d_0.T.dot(alpha.features)[0,0]), \
         abs(alpha.value - d_n.T.dot(alpha.features)[0,0]))
     self.assertGreaterEqual(abs(xi.value - b_0.T.dot(xi.features)[0,0]), \
         abs(xi.value - b_n.T.dot(xi.features)[0,0]))
     self.assertGreaterEqual(abs(gamma.value - r_0.T.dot(gamma.features)[0,0]), \
         abs(gamma.value - aux.sigmoid(r_n.T.dot(gamma.features)[0,0])))
     new_likelihood = likelihood(self.groups, self.votes)
     self.groups['gamma'].weight_param.value = r_0
     self.assertGreaterEqual(round(new_likelihood, 2),
         round(likelihood(self.groups, self.votes), 2))
     self.assertGreaterEqual(abs(lambd.value - h_0.T.dot(lambd.features)[0,0]), \
         abs(lambd.value - aux.sigmoid(h_n.T.dot(lambd.features)[0,0])))
     self.groups['gamma'].weight_param.value = r_n
     self.groups['lambda'].weight_param.value = h_0
     self.groups['lambda'].weight_param.value = h_n
     self.assertGreaterEqual(sum(absolute(u.value - W_0.dot(u.features))), \
         sum(absolute(u.value - W_n.dot(u.features))))
     self.assertGreaterEqual(sum(absolute(v.value - V_0.dot(v.features))), \
         sum(absolute(v.value - V_n.dot(v.features))))
 def _create_groups(self):
   self.groups['alpha'] = models.EntityScalarGroup('alpha', 'voter',
       models.EntityScalarParameter('d', (9,1)), 
       models.ScalarVarianceParameter('var_alpha'),
       self.var_H)
   for e_id, e_feat in self.voters.iteritems():
     self.groups['alpha'].add_instance(e_id, e_feat, self.votes)
   self.groups['beta'] = models.EntityScalarGroup('beta', 'review',
       models.EntityScalarParameter('g', (17,1)),
       models.ScalarVarianceParameter('var_beta'),
       self.var_H)
   for e_id, e_feat in self.reviews.iteritems():
     self.groups['beta'].add_instance(e_id, e_feat, self.votes)
   self.groups['xi'] = models.EntityScalarGroup('xi', 'author',
       models.EntityScalarParameter('b', (5,1)),
       models.ScalarVarianceParameter('var_xi'),
       self.var_H)
   for e_id, e_feat in self.authors.iteritems():
     self.groups['xi'].add_instance(e_id, e_feat, self.votes)
   self.groups['u'] = models.EntityArrayGroup('u', (const.K, 1), 'voter',
       models.EntityArrayParameter('W', (const.K, 9)),
       models.ArrayVarianceParameter('var_u'),
       self.var_H)
   for e_id, e_feat in self.voters.iteritems():
     self.groups['u'].add_instance(e_id, e_feat, self.votes)
   self.groups['v'] = models.EntityArrayGroup('v', (const.K, 1), 'review',
       models.EntityArrayParameter('V', (const.K,17)),
       models.ArrayVarianceParameter('var_v'),
       self.var_H)
   for e_id, e_feat in self.reviews.iteritems():
     self.groups['v'].add_instance(e_id, e_feat, self.votes)
   self.groups['gamma'] = models.InteractionScalarGroup('gamma', ('author', 
       'voter'), models.InteractionScalarParameter('r', (7, 1)), 
       models.ScalarVarianceParameter('var_gamma'), self.var_H)
   for e_id, e_feat in self.sim.iteritems():
     self.groups['gamma'].add_instance(e_id, e_feat, self.votes)
   self.groups['lambda'] = models.InteractionScalarGroup('lambda', ('author',
       'voter'), models.InteractionScalarParameter('h', (4,1)),
       models.ScalarVarianceParameter('var_lambda'), self.var_H)
   for e_id, e_feat in self.conn.iteritems():
     self.groups['lambda'].add_instance(e_id, e_feat, self.votes)
   self.groups['u'].set_pair_name('v')
   self.groups['v'].set_pair_name('u')
   for g_id, group in self.groups.iteritems():
     d_group = {}
     d_group['_id'] = str(g_id)
     d_group['pair_name'] = group.pair_name
     d_group['size'] = group.size
     d_group['entity_type'] = group.e_type
     d_group['shape'] = group.shape
     self.d_groups[g_id] = d_group
   for group in self.groups.itervalues():
     self.d_vars[group.name] = {}
     for variable in group.iter_variables():
       d_var = {}
       if isinstance(variable, models.InteractionScalarVariable):
         d_var['related_votes'] = [i for i, v in enumerate(self.votes) if \
           v[variable.e_type[0]] == variable.entity_id[0] and \
           v[variable.e_type[1]] == variable.entity_id[1]]
       else:
         d_var['related_votes'] = [i for i, v in enumerate(self.votes) if \
           v[variable.e_type] == variable.entity_id]
       d_var['entity_id'] = variable.entity_id
       d_var['num_votes'] = len(d_var['related_votes'])
       if isinstance(variable, models.ScalarVariable):
         d_var['cond_var'] = 1.0 / (1.0 / group.var_param.value + \
           float(d_var['num_votes']) / group.var_H.value)
       if isinstance(variable, models.EntityScalarVariable):
         d_var['type'] = 'EntityScalar'
         d_var['var_dot'] = group.weight_param.value.T \
           .dot(variable.features)[0,0] / group.var_param.value 
       elif isinstance(variable, models.InteractionScalarVariable):
         d_var['type'] = 'InteractionScalar'
         d_var['var_dot'] = aux.sigmoid(group.weight_param.value.T \
           .dot(variable.features)[0,0]) / group.var_param.value 
       else:
         d_var['type'] = 'EntityArray'
         d_var['inv_var'] = pinv(group.var_param.value * identity(const.K))
         d_var['var_dot'] = d_var['inv_var'].dot(group.weight_param.value) \
             .dot(variable.features)
         d_var['last_matrix'] = variable.value.dot(variable.value.T)
       d_var['group'] = variable.name
       d_var['last_sample'] = variable.value
       d_var['samples'] = []
       self.d_vars[group.name][variable.entity_id] = d_var
   for group in self.groups.itervalues():
     param = {}
     param['_id'] = group.name
     param['weight'] = group.weight_param.value
     param['var'] = group.var_param.value
     param['var_H'] = group.var_H.value
     self.d_params[group.name] = param
Ejemplo n.º 11
0
def calculate_predictions(groups, test, users, trusts, features, sim, conn):
  """ Calculates the predictions after fitting values. If the vote to be
      predicted contains entities modeled as latent variables (i.e., present
      on training set), the latent variable is used; otherwise, it is
      approximated by linear regression over features.

      Args:
        groups: dictionary of Group objects.
        test: list of vote dictionaries on test set.
        users: dictionary of user dictionaries.
        trusts: networkx DiGraph with trust network. 
        features: dictionary of a list of feature arrays, indexed by entity or
      interaction id and containing features for each vote in training.
        sim: dictionary of similarity of users dictionaries.
        conn: dictionary of connection of users dictionaries.

      Returns:
        A list of floats containing prediction values for each vote in test, in
      the same order.
  """
  pred = []
  ignored = 0
  sim_i = 0
  conn_i = 0
  for i, vote in enumerate(test):
    v_feat = features['voter'][i]
    v_feat = v_feat.reshape((v_feat.size, 1))
    alfa = groups['alpha'].get_instance(vote).value if \
          groups['alpha'].contains(vote) else groups['alpha'].weight_param.value.T \
          .dot(v_feat)[0,0]
    u = groups['u'].get_instance(vote).value if groups['u'].contains(vote) \
        else groups['u'].weight_param.value.dot(v_feat)
    r_feat = features['review'][i]
    r_feat = r_feat.reshape((r_feat.size, 1))
    beta = groups['beta'].get_instance(vote).value if \
        groups['beta'].contains(vote) else groups['beta'].weight_param.value.T \
        .dot(r_feat)[0,0]
    v = groups['v'].get_instance(vote).value if groups['v'].contains(vote) \
        else groups['v'].weight_param.value.dot(r_feat)
    a_feat = features['author'][i]
    a_feat = a_feat.reshape((a_feat.size, 1))
    xi = groups['xi'].get_instance(vote).value if \
        groups['xi'].contains(vote) else groups['xi'].weight_param.value.T \
        .dot(a_feat)[0,0]
    a_id, v_id = vote['author'], vote['voter']
    gamma = 0.0
    if v_id in users and a_id in users[v_id]['similars'] and (a_id, v_id) in sim:
      sim_feat = features['sim'][sim_i]
      sim_feat = sim_feat.reshape((sim_feat.size, 1))
      gamma = groups['gamma'].get_instance(vote).value if \
          groups['gamma'].contains(vote) else \
          sigmoid(groups['gamma'].weight_param.value.T.dot(sim_feat)[0,0])
      sim_i += 1
    lambd = 0.0
    if v_id in trusts and a_id in trusts[v_id] and (a_id, v_id) in conn:
      conn_feat = features['conn'][conn_i]
      conn_feat = conn_feat.reshape((conn_feat.size, 1))
      lambd = groups['lambda'].get_instance(vote).value if \
          groups['lambda'].contains(vote) else \
          sigmoid(groups['lambda'].weight_param.value.T.dot(conn_feat)[0,0])
      conn_i += 1
    prediction = u.T.dot(v)[0,0] + alfa + beta + xi + gamma + lambd
    pred.append(prediction)
  return pred
Ejemplo n.º 12
0
def calculate_predictions(groups, test, users, trusts, features, sim, conn):
    """ Calculates the predictions after fitting values. If the vote to be
      predicted contains entities modeled as latent variables (i.e., present
      on training set), the latent variable is used; otherwise, it is
      approximated by linear regression over features.

      Args:
        groups: dictionary of Group objects.
        test: list of vote dictionaries on test set.
        users: dictionary of user dictionaries.
        trusts: networkx DiGraph with trust network. 
        features: dictionary of a list of feature arrays, indexed by entity or
      interaction id and containing features for each vote in training.
        sim: dictionary of similarity of users dictionaries.
        conn: dictionary of connection of users dictionaries.

      Returns:
        A list of floats containing prediction values for each vote in test, in
      the same order.
  """
    pred = []
    ignored = 0
    sim_i = 0
    conn_i = 0
    for i, vote in enumerate(test):
        v_feat = features['voter'][i]
        v_feat = v_feat.reshape((v_feat.size, 1))
        alfa = groups['alpha'].get_instance(vote).value if \
              groups['alpha'].contains(vote) else groups['alpha'].weight_param.value.T \
              .dot(v_feat)[0,0]
        u = groups['u'].get_instance(vote).value if groups['u'].contains(vote) \
            else groups['u'].weight_param.value.dot(v_feat)
        r_feat = features['review'][i]
        r_feat = r_feat.reshape((r_feat.size, 1))
        beta = groups['beta'].get_instance(vote).value if \
            groups['beta'].contains(vote) else groups['beta'].weight_param.value.T \
            .dot(r_feat)[0,0]
        v = groups['v'].get_instance(vote).value if groups['v'].contains(vote) \
            else groups['v'].weight_param.value.dot(r_feat)
        a_feat = features['author'][i]
        a_feat = a_feat.reshape((a_feat.size, 1))
        xi = groups['xi'].get_instance(vote).value if \
            groups['xi'].contains(vote) else groups['xi'].weight_param.value.T \
            .dot(a_feat)[0,0]
        a_id, v_id = vote['author'], vote['voter']
        gamma = 0.0
        if v_id in users and a_id in users[v_id]['similars'] and (a_id,
                                                                  v_id) in sim:
            sim_feat = features['sim'][sim_i]
            sim_feat = sim_feat.reshape((sim_feat.size, 1))
            gamma = groups['gamma'].get_instance(vote).value if \
                groups['gamma'].contains(vote) else \
                sigmoid(groups['gamma'].weight_param.value.T.dot(sim_feat)[0,0])
            sim_i += 1
        lambd = 0.0
        if v_id in trusts and a_id in trusts[v_id] and (a_id, v_id) in conn:
            conn_feat = features['conn'][conn_i]
            conn_feat = conn_feat.reshape((conn_feat.size, 1))
            lambd = groups['lambda'].get_instance(vote).value if \
                groups['lambda'].contains(vote) else \
                sigmoid(groups['lambda'].weight_param.value.T.dot(conn_feat)[0,0])
            conn_i += 1
        prediction = u.T.dot(v)[0, 0] + alfa + beta + xi + gamma + lambd
        pred.append(prediction)
    return pred
Ejemplo n.º 13
0
    def fit(self, votes, reviews_dict):
        """ Fits a TF model given training set (votes).

        Args:
          vote: list of votes, represented as dictionaries (training set).
          reviews_dict: dictionary of reviews.

        Returns:
          None. Instance fields are updated.
    """
        votes = votes[:]  # shallow
        self._initialize_matrices(votes, reviews_dict)
        self._calculate_vote_bias(votes, reviews_dict)
        self._calculate_rating_bias(reviews_dict)
        reviews = reviews_dict.values()
        shuffle(votes)
        reviews = set([vote['review'] for vote in votes])  # only ids first
        reviews = [reviews_dict[r_id] for r_id in reviews]
        shuffle(reviews)
        previous = float('inf')
        alpha = _ALPHA
        for it in xrange(_ITER):
            alpha = alpha / sqrt(it + 1)
            print 'Iteration %d' % it
            for vote in votes:
                voter = vote['voter']
                author = vote['author']
                review = vote['review']
                product = reviews_dict[review]['product']
                v = self.voter_map[voter]
                a = self.author_map[author]
                p = self.product_map[product]
                pred = self.overall_mean + self.voter_bias[voter] + \
                     self.review_a_bias[author] + self.review_p_bias[product] + \
                     self.tensor_dot(v, a, p)
                error = sigmoid(pred) - vote['vote']
                der_sig = sigmoid_der1(pred)
                new_V = self.V[v,:] - alpha * (error * der_sig * \
                    self.tensor_dot_der_v(a, p))
                new_A = self.A[a,:] - alpha * (error * der_sig * \
                    self.tensor_dot_der_a(v, p))
                new_P = self.P[p,:] - alpha * (error * der_sig * \
                    self.tensor_dot_der_p(v, a))
                new_S = self.S - alpha * (error * der_sig * \
                    self.tensor_dot_der_s(v, a, p))
                self.V[v, :] = new_V
                self.A[a, :] = new_A
                self.P[p, :] = new_P
                self.S = new_S
            for review in reviews:
                author = review['author']
                product = review['product']
                a = self.author_map[author]
                p = self.product_map[product]
                pred = self.rating_avg + self.author_bias[author] + \
                    self.product_bias[product] + self.A[a,:].dot(self.P[p,:])
                error = sigmoid(pred) - review['rating']
                der_sig = sigmoid_der1(pred)
                new_A = self.A[a, :] - alpha * (error * der_sig * self.P[p, :])
                new_P = self.P[p, :] - alpha * (error * der_sig * self.A[a, :])
                self.A[a, :] = new_A
                self.P[p, :] = new_P
            self.V -= alpha * _BETA * self.V
            self.A -= alpha * _BETA * self.A
            self.P -= alpha * _BETA * self.P
            self.S -= alpha * _BETA * self.S
            value = 0.0
            for vote in votes:
                voter = vote['voter']
                author = vote['author']
                review = vote['review']
                product = reviews_dict[review]['product']
                v = self.voter_map[voter]
                a = self.author_map[author]
                p = self.product_map[product]
                pred = self.overall_mean + self.voter_bias[voter] + \
                    self.review_a_bias[author] + self.review_p_bias[product] + \
                    self.tensor_dot(v, a, p)
                value += (vote['vote'] -
                          sigmoid(pred))**2  # normalized in (0,1)
            for review in reviews:
                author = review['author']
                product = review['product']
                a = self.author_map[author]
                p = self.product_map[product]
                pred = self.rating_avg + self.author_bias[author] + \
                    self.product_bias[product] + self.A[a,:].dot(self.P[p,:])
                value += (review['rating'] -
                          sigmoid(pred))**2  # normalized in (0,1)
            sse = value
            for v in self.voter_map.itervalues():
                for i in xrange(_K):
                    value += _BETA * self.V[v, i]**2
            for a in self.author_map.itervalues():
                for i in xrange(_K):
                    value += _BETA * self.A[a, i]**2
            for p in self.product_map.itervalues():
                for i in xrange(_K):
                    value += _BETA * self.P[p, i]**2
            for i in xrange(_K):
                for j in xrange(_K):
                    for k in xrange(_K):
                        value += _BETA * self.S[i, j, k]**2
            value /= 2.0
            print '- Error: %f' % value
            print '- Average normalized RMSE: %f' % sqrt(sse / len(votes))
            if abs(previous - value) < _TOL:
                print '-*- Convergence after %d iterations' % (i + 1)
                break
            previous = value
Ejemplo n.º 14
0
  def fit(self, X, y, qid):
    """ Fits a model given training set (votes).

        Args:
          X: numpy array with feature arrays in rows (training set).
          y: list of responses, in the same order of X.
          qid: list of query ids (associated to each reader-product pair), in 
            the same order of X.
        
        Returns:
          None. Instance fields are updated.
    """
    X = self._initialize_coef(X)
    m, n = shape(X)
    X_qid = {}
    y_qid = {}
    pairs = []
    for i in xrange(m):
      if qid[i] not in X_qid:
        X_qid[qid[i]] = []
        y_qid[qid[i]] = []
      X_qid[qid[i]].append(i)
      y_qid[qid[i]].append(i)
    count = 0
    for qid in X_qid:
      max_one = max([y[i] for i in X_qid[qid]])
      rest = [y[i] for i in X_qid[qid] if y[i] < max_one]
      max_two = max(rest) if len(rest) > 0 else max_one
      for i in X_qid[qid]:
        for j in X_qid[qid]:
          if i < j and (y[i] >= max_two or y[j] >= max_two):
            pairs.append((i, j))
    p = len(pairs)
    shuffle(pairs)
    t = 1.0
    check = _ITER / 10
    for it in xrange(_ITER):
      alpha = _ALPHA / pow(t, _POW)
      grad = zeros(self.w.shape)
      p_index = random_integers(0, p-1)
      i, j = pairs[p_index]
      i = random_integers(0, m-1)
      true_i = y[i]
      true_j = y[j]
      hyp_i = self.w.dot(X[i])
      if true_i - hyp_i > _EPS and true_i != 0:
        grad += - (_GAMMA) * X[i,:] * true_i 
      elif true_i - hyp_i < - _EPS and true_i != 0:
        grad += (_GAMMA) * X[i,:] * true_i
      hyp_j = self.w.dot(X[j])
      if true_j - hyp_j > _EPS and true_j != 0:
        grad += - (_GAMMA) * X[j,:] * true_j
      elif true_j - hyp_j < - _EPS and true_j != 0:
        grad += (_GAMMA) * X[j,:] * true_j
      x = X[i] - X[j]
      dot = self.w.dot(x)
      delta = 2**y[i] - 2**y[j]
      true = (31 + delta) / 62
      if true_i != 0 and true_j != 0:
        grad += (1 - _GAMMA) * (sigmoid(dot) - true) * x * true 
      self.w[0] -= alpha * grad[0]
      self.w[1:] = max(0.0, 1.0 - alpha * _BETA) * self.w[1:] - alpha * \
            grad[1:]
      t += 1.0
      if (it + 1) % check == 0:
        shuffle(pairs)
        value = 0.0
        for i in xrange(m):
          hyp = self.w.dot(X[i,:])
          value += max(abs(hyp - float(y[i])) - _EPS, 0) * y[i] 
        value += 0.5 * _BETA * (self.w[1:].dot(self.w[1:])) # L2 norm
        value /= m 
        print 'Obj. Fun. (Reg) on iteration %d: %f' % (it, value)
Ejemplo n.º 15
0
    def fit(self, X, y, qid):
        """ Fits a model given training set (votes).

        Args:
          X: numpy array with feature arrays in rows (training set).
          y: list of responses, in the same order of X.
          qid: list of query ids (associated to each reader-product pair), in 
            the same order of X.
        
        Returns:
          None. Instance fields are updated.
    """
        X = self._initialize_coef(X)
        m, n = shape(X)
        X_qid = {}
        y_qid = {}
        pairs = []
        for i in xrange(m):
            if qid[i] not in X_qid:
                X_qid[qid[i]] = []
                y_qid[qid[i]] = []
            X_qid[qid[i]].append(i)
            y_qid[qid[i]].append(i)
        count = 0
        for qid in X_qid:
            max_one = max([y[i] for i in X_qid[qid]])
            rest = [y[i] for i in X_qid[qid] if y[i] < max_one]
            max_two = max(rest) if len(rest) > 0 else max_one
            for i in X_qid[qid]:
                for j in X_qid[qid]:
                    if i < j and (y[i] >= max_two or y[j] >= max_two):
                        pairs.append((i, j))
        p = len(pairs)
        shuffle(pairs)
        t = 1.0
        check = _ITER / 10
        for it in xrange(_ITER):
            alpha = _ALPHA / pow(t, _POW)
            grad = zeros(self.w.shape)
            p_index = random_integers(0, p - 1)
            i, j = pairs[p_index]
            i = random_integers(0, m - 1)
            true_i = y[i]
            true_j = y[j]
            hyp_i = self.w.dot(X[i])
            if true_i - hyp_i > _EPS and true_i != 0:
                grad += -(_GAMMA) * X[i, :] * true_i
            elif true_i - hyp_i < -_EPS and true_i != 0:
                grad += (_GAMMA) * X[i, :] * true_i
            hyp_j = self.w.dot(X[j])
            if true_j - hyp_j > _EPS and true_j != 0:
                grad += -(_GAMMA) * X[j, :] * true_j
            elif true_j - hyp_j < -_EPS and true_j != 0:
                grad += (_GAMMA) * X[j, :] * true_j
            x = X[i] - X[j]
            dot = self.w.dot(x)
            delta = 2**y[i] - 2**y[j]
            true = (31 + delta) / 62
            if true_i != 0 and true_j != 0:
                grad += (1 - _GAMMA) * (sigmoid(dot) - true) * x * true
            self.w[0] -= alpha * grad[0]
            self.w[1:] = max(0.0, 1.0 - alpha * _BETA) * self.w[1:] - alpha * \
                  grad[1:]
            t += 1.0
            if (it + 1) % check == 0:
                shuffle(pairs)
                value = 0.0
                for i in xrange(m):
                    hyp = self.w.dot(X[i, :])
                    value += max(abs(hyp - float(y[i])) - _EPS, 0) * y[i]
                value += 0.5 * _BETA * (self.w[1:].dot(self.w[1:]))  # L2 norm
                value /= m
                print 'Obj. Fun. (Reg) on iteration %d: %f' % (it, value)