Esempio n. 1
0
 def _get_embedded_sampled(self, embs_cat, embs_mulhot, b_cat, b_mulhot, 
   mappings, n_sampled, attributes, device='/gpu:0'):
   cat_indices, mulhot_indices, mulhot_segids, mulhot_lengths = mappings
   cat_list, mulhot_list = [], []
   bias_cat_list, bias_mulhot_list = [], []
   with tf.device(device):
     for i in xrange(attributes.num_features_cat):
       embedded = lookup(embs_cat[i], cat_indices[i])
       cat_list.append(embedded)
       if b_cat is not None:
         b = lookup(b_cat[i], cat_indices[i])
         bias_cat_list.append(b)
     for i in xrange(attributes.num_features_mulhot):
       inds = tf.slice(mulhot_indices[i], [0], [self.sampled_mulhot_l[i]])
       segids = tf.slice(mulhot_segids[i], [0], [self.sampled_mulhot_l[i]])
       embedded_flat = lookup(embs_mulhot[i], inds)
       embedded_sum = tf.unsorted_segment_sum(embedded_flat, segids, n_sampled)
       embedded = tf.div(embedded_sum, mulhot_lengths[i])
       mulhot_list.append(embedded)
       if b_mulhot is not None:
         b_embedded_flat = lookup(b_mulhot[i], inds)
         b_embedded_sum = tf.unsorted_segment_sum(b_embedded_flat, 
           segids, n_sampled)
         b_embedded = tf.div(b_embedded_sum, mulhot_lengths[i])
         bias_mulhot_list.append(b_embedded)
     if b_cat is None and b_mulhot is None:
       bias = None
     else:
       bias = tf.squeeze(tf.reduce_mean(bias_cat_list + bias_mulhot_list, 0))
     return cat_list, mulhot_list, bias
Esempio n. 2
0
  def _pass_sampled_items(self, prefix='item', device='/gpu:0'):
    self.sampled_mulhot_l = []
    res = []
    var_s = self.i_indices['sampled']
    att = self.item_attributes
    inds = self.i_indices['sampled_pass']
    with tf.device(device):
      for i in xrange(att.num_features_cat):
        vals = lookup(self.att[prefix][0][i], inds)      
        res.append(tf.assign(var_s[0][i], vals))
      for i in xrange(att.num_features_mulhot):
        begin_ = lookup(self.att[prefix][2][i], inds)
        size_ = lookup(self.att[prefix][3][i], inds)
        b = tf.unstack(begin_)
        s = tf.unstack(size_)
        mulhot_indices = batch_slice2(self.att[prefix][1][i], b, s, self.n_sampled)
        mulhot_segids = batch_segids2(s, self.n_sampled)

        l0 = tf.reduce_sum(size_)
        indices = tf.range(l0)
        res.append(tf.scatter_update(var_s[1][i], indices, mulhot_indices))
        res.append(tf.scatter_update(var_s[2][i], indices, mulhot_segids))
        res.append(tf.assign(var_s[3][i], tf.reshape(tf.to_float(size_), [self.n_sampled, 1])))

        l = tf.get_variable(name='sampled_l_mulhot_{}'.format(i), dtype=tf.int32, 
          initializer=tf.constant(0), trainable=False)      
        self.sampled_mulhot_l.append(l)
        res.append(tf.assign(l, l0))
    return res
Esempio n. 3
0
  def get_prediction(self, latent, pool='full', device='/gpu:0', output_feat=1):
    '''
    output_feat: in prediction stage
      0: not using attributes 
      1: using attributes, use mean to combine multi-hot features
      2: using attributes, use max  to combine multi-hot features
      3: same as 2, but softmax (instead of max)
    '''
    # compute inner product between item_hidden and {user_feature_embedding}
    # then lookup to compute logits
    with tf.device(device):
      out_layer = self.i_indices[pool]
      indices_cat, indices_mulhot, segids_mulhot, lengths_mulhot = out_layer
      innerps = []

      n1 = 1 if output_feat == 0 else self.item_attributes.num_features_cat
      n2 = 0 if output_feat == 0 else self.item_attributes.num_features_mulhot

      for i in xrange(n1):
        item_emb_cat = self.item_embs2_cat[i] if self.item_output else self.item_embs_cat[i]
        i_biases_cat = self.i_biases2_cat[i] if self.item_output else self.i_biases_cat[i]
        u = latent[i] if isinstance(latent, list) else latent
        inds = indices_cat[i]
        innerp = tf.matmul(item_emb_cat, tf.transpose(u)) + i_biases_cat # Vf by mb
        innerps.append(lookup(innerp, inds)) # V by mb
      offset = self.item_attributes.num_features_cat
      
      for i in xrange(n2):
        item_embs_mulhot = self.item_embs2_mulhot[i] if self.item_output else self.item_embs_mulhot[i]
        item_biases_mulhot = self.i_biases2_mulhot[i] if self.item_output else self.i_biases_mulhot[i]
        u = latent[i+offset] if isinstance(latent, list) else latent
        lengs = lengths_mulhot[i]
        if pool == 'full':
          inds = indices_mulhot[i]
          segids = segids_mulhot[i]
          V = self.logit_size
        else:
          inds = tf.slice(indices_mulhot[i], [0], [self.sampled_mulhot_l[i]])
          segids = tf.slice(segids_mulhot[i], [0], [self.sampled_mulhot_l[i]])
          V = self.n_sampled
        innerp = tf.add(tf.matmul(item_embs_mulhot, tf.transpose(u)), 
          item_biases_mulhot)

        if output_feat == 1:
          innerps.append(tf.div(tf.unsorted_segment_sum(lookup(innerp, 
            inds), segids, V), lengs))
        elif output_feat == 2:
          innerps.append(tf.segment_max(lookup(innerp, inds), segids))  
        elif output_feat == 3:
          score_max = tf.reduce_max(innerp)
          innerp = tf.subtract(innerp, score_max)
          innerps.append(score_max + tf.log(1 + tf.unsorted_segment_sum(tf.exp(
            lookup(innerp, inds)), segids, V)))
        else:
          print('Error: Attribute combination not implemented!')
          exit(1)

      logits = tf.transpose(tf.reduce_mean(innerps, 0))
    return logits
Esempio n. 4
0
  def _get_embedded2(self, embs_cat, embs_mulhot, b_cat, b_mulhot, 
    inds, mb, attributes, prefix='', concatenation=True, no_id=False, 
    device='/gpu:0'):
    cat_list, mulhot_list = [], []
    bias_cat_list, bias_mulhot_list = [], []
    with tf.device(device):
      if no_id and attributes.num_features_cat == 1:
        if b_cat is not None or b_mulhot is not None:
          print('error: not implemented')
          exit()
        bias = None
        dim = attributes._embedding_size_list_cat[0]
        cat_list = [tf.zeros([mb, dim],dtype=tf.float32)]
        if concatenation:
          return cat_list[0], bias
        else:
          return cat_list, [], bias

      for i in xrange(attributes.num_features_cat):
        if no_id and i == 0:
          continue
        cat_indices = lookup(self.att[prefix][0][i], inds)
        embedded = lookup(embs_cat[i], cat_indices, 
          name='emb_lookup_item_{0}'.format(i))  # on cpu?
        cat_list.append(embedded)
        if b_cat is not None:
          b = lookup(b_cat[i], cat_indices, 
            name = 'emb_lookup_item_b_{0}'.format(i))
          bias_cat_list.append(b)
      for i in xrange(attributes.num_features_mulhot):
        begin_ = tf.unstack(lookup(self.att[prefix][2][i], inds))
        size_ = tf.unstack(lookup(self.att[prefix][3][i], inds))
        mulhot_i = []
        b_mulhot_i = []
        for j in xrange(mb):
          b = begin_[j]
          s = size_[j]
          m_inds = tf.slice(self.att[prefix][1][i], [b], [s])
          mulhot_i.append(tf.reduce_mean(lookup(embs_mulhot[i], m_inds), 0))
          # mulhot_i.append(tf.reduce_mean(lookup(embs_mulhot[i], m_inds), 0, True))
          if b_mulhot is not None:
            b_mulhot_i.append(tf.reduce_mean(lookup(b_mulhot[i], m_inds), 0))
            # b_mulhot_i.append(tf.reduce_mean(lookup(b_mulhot[i], m_inds), 0, 
            #   True))
        # mulhot_list.append(concat_versions(0, mulhot_i))
        mulhot_list.append(tf.stack(mulhot_i))
        if b_mulhot is not None:
          # bias_mulhot_list.append(concat_versions(0, b_mulhot_i))
          bias_mulhot_list.append(tf.stack(b_mulhot_i))
      
      if b_cat is None and b_mulhot is None:
        bias = None
      else:
        bias = tf.squeeze(tf.reduce_mean(bias_cat_list + bias_mulhot_list, 0))

      if concatenation:
        return concat_versions(1, cat_list + mulhot_list), bias
      else:
        return cat_list, mulhot_list, bias
Esempio n. 5
0
    def _get_embedded(self,
                      embs_cat,
                      embs_mulhot,
                      b_cat,
                      b_mulhot,
                      inds,
                      mb,
                      attributes,
                      prefix='',
                      concatenation=True,
                      no_id=False,
                      no_attribute=False,
                      device='/gpu:0'):
        cat_list, mulhot_list = [], []
        bias_cat_list, bias_mulhot_list = [], []
        with tf.device(device):
            if no_id and attributes.num_features_cat == 1:
                if b_cat is not None or b_mulhot is not None:
                    print('error: not implemented')
                    exit()
                bias = None
                dim = attributes._embedding_size_list_cat[0]
                cat_list = [tf.zeros([mb, dim], dtype=tf.float32)]
                if concatenation:
                    return cat_list[0], bias
                else:
                    return cat_list, [], bias

            n1 = 1 if no_attribute else attributes.num_features_cat
            n2 = 0 if no_attribute else attributes.num_features_mulhot

            for i in xrange(n1):
                if no_id and i == 0:
                    continue
                cat_indices = lookup(self.att[prefix][0][i], inds)
                embedded = lookup(
                    embs_cat[i],
                    cat_indices,
                    name='emb_lookup_item_{0}'.format(i))  # on cpu?
                cat_list.append(embedded)
                if b_cat is not None:
                    b = lookup(b_cat[i],
                               cat_indices,
                               name='emb_lookup_item_b_{0}'.format(i))
                    bias_cat_list.append(b)
            for i in xrange(n2):
                begin_ = lookup(self.att[prefix][2][i], inds)
                size_ = lookup(self.att[prefix][3][i], inds)
                # mulhot_indices, mulhot_segids = batch_slice_segids(
                #   self.att[prefix][1][i], begin_, size_, mb)

                # mulhot_indices = batch_slice(self.att[prefix][1][i], begin_,
                #   size_, mb)
                # mulhot_segids = batch_segids(size_, mb)

                b = tf.unstack(begin_)
                s = tf.unstack(size_)
                mulhot_indices = batch_slice2(self.att[prefix][1][i], b, s, mb)
                mulhot_segids = batch_segids2(s, mb)
                embedded_flat = lookup(embs_mulhot[i], mulhot_indices)
                embedded_sum = tf.unsorted_segment_sum(embedded_flat,
                                                       mulhot_segids, mb)
                lengs = tf.reshape(tf.to_float(size_), [mb, 1])
                embedded = tf.div(embedded_sum, lengs)
                mulhot_list.append(embedded)
                if b_mulhot is not None:
                    b_embedded_flat = lookup(b_mulhot[i], mulhot_indices)
                    b_embedded_sum = tf.unsorted_segment_sum(
                        b_embedded_flat, mulhot_segids, mb)
                    b_embedded = tf.div(b_embedded_sum, lengs)
                    bias_mulhot_list.append(b_embedded)

            if b_cat is None and b_mulhot is None:
                bias = None
            else:
                bias = tf.squeeze(
                    tf.reduce_mean(bias_cat_list + bias_mulhot_list, 0))

            if concatenation:
                return concat_versions(1, cat_list + mulhot_list), bias
            else:
                return cat_list, mulhot_list, bias