Beispiel #1
0
    def get_product_scores(self, user_idxs, query_word_idx, product_idxs = None, scope = None):
        with variable_scope.variable_scope(scope or "embedding_graph"):
            query_vec, query_embs = get_query_embedding(self, query_word_idx, self.word_emb,  True)
            user_vec, _ = get_user_vec(self, query_vec, True)

            # get candidate product embedding [None, embed_size]
            product_vec = None
            product_bias = None
            if product_idxs != None:
                product_vec = tf.nn.embedding_lookup(self.product_emb, product_idxs)
                product_bias = tf.nn.embedding_lookup(self.product_bias, product_idxs)
            else:
                product_vec = self.product_emb
                product_bias = self.product_bias

            print('Similarity Function : ' + self.similarity_func)


            if self.similarity_func == 'product':
                return tf.matmul((1.0 - self.Wu) * user_vec + self.Wu * query_vec, product_vec, transpose_b=True)
            elif self.similarity_func == 'bias_product':
                return tf.matmul((1.0 - self.Wu) * user_vec + self.Wu * query_vec, product_vec, transpose_b=True) + product_bias
            else:
                user_vec = user_vec / tf.sqrt(tf.reduce_sum(tf.square(user_vec), 1, keep_dims=True))
                query_vec = query_vec / tf.sqrt(tf.reduce_sum(tf.square(query_vec), 1, keep_dims=True))
                product_vec = product_vec / tf.sqrt(tf.reduce_sum(tf.square(product_vec), 1, keep_dims=True))
                return tf.matmul((1.0 - self.Wu) * user_vec + self.Wu * query_vec, product_vec, transpose_b=True)
Beispiel #2
0
    def _build_embedding_graph_and_loss(self, scope=None):
        with variable_scope.variable_scope(scope or "embedding_graph"):
            # define all variables
            init_width = 0.5 / self.embed_size
            self.word_emb = tf.Variable(
                tf.random_uniform([self.vocab_size + 1, self.embed_size],
                                  -init_width, init_width),
                name="word_emb")
            self.word_bias = tf.Variable(tf.zeros([self.vocab_size + 1]),
                                         name="word_b")
            # user/product embeddings.
            self.user_emb = tf.Variable(tf.zeros(
                [self.user_size, self.embed_size]),
                                        name="user_emb")
            self.user_bias = tf.Variable(tf.zeros([self.user_size]),
                                         name="user_b")
            self.product_emb = tf.Variable(tf.zeros(
                [self.product_size, self.embed_size]),
                                           name="product_emb")
            self.product_bias = tf.Variable(tf.zeros([self.product_size]),
                                            name="product_b")

            # define computation graph
            batch_size = array_ops.shape(self.word_idxs)[0]
            loss = None
            regularization_terms = []

            # predict words loss
            uw_loss_tensor, uw_embs = single_nce_loss(
                self, self.user_idxs, self.user_emb, self.word_idxs,
                self.word_emb, self.word_bias, self.vocab_size,
                self.vocab_distribute)
            pw_loss_tensor, pw_embs = single_nce_loss(
                self, self.product_idxs, self.product_emb, self.word_idxs,
                self.word_emb, self.word_bias, self.vocab_size,
                self.vocab_distribute)
            loss = tf.reduce_sum(uw_loss_tensor + pw_loss_tensor)
            regularization_terms += uw_embs + pw_embs

            # pair search loss
            query_vec, qw_embs = get_query_embedding(self,
                                                     self.query_word_idxs,
                                                     self.word_emb, None)
            regularization_terms += qw_embs

            uqr_loss_tensor, uqr_embs = pair_search_loss(
                self, self.Wu, query_vec, self.user_idxs, self.user_emb,
                self.product_idxs, self.product_emb, self.product_bias,
                self.product_size, self.product_distribute)
            regularization_terms += uqr_embs
            loss += tf.reduce_sum(uqr_loss_tensor)

            # regulaizer loss
            if self.L2_lambda > 0:
                l2_loss = tf.nn.l2_loss(regularization_terms[0])
                for i in range(1, len(regularization_terms)):
                    l2_loss += tf.nn.l2_loss(regularization_terms[i])

            return loss / math_ops.cast(batch_size, tf.float32)
Beispiel #3
0
    def _build_embedding_graph_and_loss(self, scope = None):
        with variable_scope.variable_scope(scope or "embedding_graph"):
            # Word embeddings.
            init_width = 0.5 / self.embed_size
            self.word_emb = tf.Variable( tf.random_uniform(
                                [self.vocab_size+1, self.embed_size], -init_width, init_width),
                                name="word_emb")
            self.word_bias = tf.Variable(tf.zeros([self.vocab_size+1]), name="word_b")

            # user/product embeddings.
            self.user_emb =    tf.Variable( tf.zeros([self.user_size, self.embed_size]),
                                name="user_emb")
            self.user_bias =    tf.Variable( tf.zeros([self.user_size]), name="user_b")
            self.product_emb =    tf.Variable( tf.zeros([self.product_size+1, self.embed_size]),
                                name="product_emb")
            self.product_bias =    tf.Variable( tf.zeros([self.product_size+1]), name="product_b")


            loss = None
            regularization_terms = []
            pr_loss_tensor, pr_embs = single_nce_loss(self, self.product_idxs, self.product_emb, self.word_idxs, self.word_emb,
                                                self.word_bias, self.vocab_size, self.vocab_distribute)
            loss = tf.reduce_sum(pr_loss_tensor)
            regularization_terms += pr_embs
            #self.print_ops.append(tf.print("product_loss: ", pr_loss, output_stream=sys.stdout))

            # get query_vec
            query_vec, qw_embs = get_query_embedding(self, self.query_word_idxs, self.word_emb, None)
            regularization_terms += qw_embs

            # get user_vec by looking history product list
            user_vec, r_terms = get_user_vec(self, query_vec)
            regularization_terms += r_terms
            regularization_terms.append(user_vec)

            # compute the pair search loss
            combined_vec = user_vec * (1-self.Wu) + query_vec * self.Wu
            uqr_loss_tensor, uqr_embs = single_nce_loss_with_vec(self, combined_vec, self.product_idxs, self.product_emb,
                                self.product_bias, self.product_size, self.product_distribute)
            loss += tf.reduce_sum(uqr_loss_tensor)
            regularization_terms += uqr_embs

            # L2 regularization
            if self.L2_lambda > 0:
                l2_loss = tf.nn.l2_loss(regularization_terms[0])
                for i in range(1,len(regularization_terms)):
                    l2_loss += tf.nn.l2_loss(regularization_terms[i])
                loss += self.L2_lambda * l2_loss

            batch_size = array_ops.shape(self.word_idxs)[0]  #get batch_size

            return loss / math_ops.cast(batch_size, tf.float32)
Beispiel #4
0
Datei: lse.py Projekt: PTYin/ESRT
    def get_product_scores(self,
                           user_idxs,
                           query_word_idx,
                           product_idxs=None,
                           scope=None):
        """
        Args:
            user_idxs: Tensor with shape of [batch_size] with type of int32.
            query_word_idx: Tensor with shape for [batch_size, query_max_length] with type of int32.
            product_idxs: Tensor with shape of [batch_size] with type of int32 or None.
            scope:

        Return:
            product_scores: Tensor with shape of [batch_size, batch_size] or [batch_size, len(product_vocab)]
                            with type of float32. its (i, j) entry is the score of j product retrieval by i
                            example(which is a linear combination of user and query).


        """

        with variable_scope.variable_scope(scope or "LSE_graph"):
            # get query vector
            query_vec, word_vecs = get_query_embedding(self, query_word_idx,
                                                       self.word_emb, True)
            # match with product
            product_vec = None
            product_bias = None
            if product_idxs != None:
                product_vec = tf.nn.embedding_lookup(self.product_emb,
                                                     product_idxs)
                product_bias = tf.nn.embedding_lookup(self.product_bias,
                                                      product_idxs)
            else:
                product_vec = self.product_emb
                product_bias = self.product_bias

            print('Similarity Function : ' + self.similarity_func)

            if self.similarity_func == 'product':
                return tf.matmul(query_vec, product_vec, transpose_b=True)
            elif self.similarity_func == 'bias_product':
                return tf.matmul(query_vec, product_vec,
                                 transpose_b=True) + product_bias
            else:
                query_norm = tf.sqrt(
                    tf.reduce_sum(tf.square(query_vec), 1, keep_dims=True))
                product_norm = tf.sqrt(
                    tf.reduce_sum(tf.square(product_vec), 1, keep_dims=True))
                return tf.matmul(query_vec / query_norm,
                                 product_vec / product_norm,
                                 transpose_b=True)
Beispiel #5
0
    def get_product_scores(self,
                           user_idxs,
                           query_word_idx,
                           product_idxs=None,
                           scope=None):
        with variable_scope.variable_scope(scope or "embedding_graph"):
            # get user embedding [None, embed_size]
            user_vec = tf.nn.embedding_lookup(
                self.entity_dict['user']['embedding'], user_idxs)
            # get query embedding [None, embed_size]
            if self.dynamic_weight >= 0.0:
                print('Query as a dynamic relationship')
                query_vec, query_embs = get_query_embedding(
                    self, query_word_idx,
                    self.entity_dict['word']['embedding'], True)
            else:
                print('Query as a static relationship')
                query_vec = self.query_static_vec

            # get candidate product embedding [None, embed_size]
            product_vec = None
            product_bias = None
            if product_idxs != None:
                product_vec = tf.nn.embedding_lookup(
                    self.entity_dict['product']['embedding'], product_idxs)
                product_bias = tf.nn.embedding_lookup(self.product_bias,
                                                      product_idxs)
            else:
                product_vec = self.entity_dict['product']['embedding']
                product_bias = self.product_bias

            print('Similarity Function : ' + self.similarity_func)
            example_vec = (1.0 - self.Wu) * user_vec + self.Wu * query_vec
            #example_vec = user_vec + query_vec

            if self.similarity_func == 'product':
                return tf.matmul(example_vec, product_vec,
                                 transpose_b=True), example_vec
            elif self.similarity_func == 'bias_product':
                return tf.matmul(example_vec, product_vec,
                                 transpose_b=True) + product_bias, example_vec
            else:
                norm_vec = example_vec / tf.sqrt(
                    tf.reduce_sum(tf.square(example_vec), 1, keep_dims=True))
                product_vec = product_vec / tf.sqrt(
                    tf.reduce_sum(tf.square(product_vec), 1, keep_dims=True))
                return tf.matmul(norm_vec, product_vec,
                                 transpose_b=True), example_vec
Beispiel #6
0
Datei: lse.py Projekt: PTYin/ESRT
    def LSE_nce_loss(self, user_idxs, product_idxs, word_idxs,
                     context_word_idxs):
        batch_size = array_ops.shape(word_idxs)[0]  # get batch_size
        loss = None

        # get f(s)
        word_idx_list = tf.stack([word_idxs] + context_word_idxs, 1)
        f_s, [f_W, word_vecs] = get_query_embedding(self, word_idx_list,
                                                    self.word_emb, None)

        # Negative sampling
        loss, true_w, sample_w = self.LSE_single_nce_loss(
            f_s, product_idxs, self.product_emb, self.product_bias,
            self.product_size, self.product_distribute)

        # L2 regularization
        if self.L2_lambda > 0:
            loss += self.L2_lambda * (
                tf.nn.l2_loss(true_w) + tf.nn.l2_loss(sample_w) +
                tf.nn.l2_loss(f_W) + tf.nn.l2_loss(word_vecs))

        return loss / math_ops.cast(batch_size, dtypes.float32)
Beispiel #7
0
    def _build_embedding_graph_and_loss(self, scope=None):
        # decide which relation we want to use
        self.use_relation_dict = {
            'also_bought': False,
            'also_viewed': False,
            'bought_together': False,
            'brand': False,
            'categories': False,
        }
        if 'none' in self.net_struct:
            print('Use no relation')
        else:
            need_relation_list = []
            for key in self.use_relation_dict:
                if key in self.net_struct:
                    self.use_relation_dict[key] = True
                    need_relation_list.append(key)
            if len(need_relation_list) > 0:
                print('Use relation ' + ' '.join(need_relation_list))
            else:
                print('Use all relation')
                for key in self.use_relation_dict:
                    self.use_relation_dict[key] = True

        # build graph
        with variable_scope.variable_scope(scope or "embedding_graph"):
            loss = None
            regularization_terms = []
            batch_size = array_ops.shape(self.user_idxs)[0]  #get batch_size
            # user + query -> product
            query_vec = None
            if self.dynamic_weight >= 0.0:
                print('Treat query as a dynamic relationship')
                query_vec, qw_embs = get_query_embedding(
                    self, self.query_word_idxs,
                    self.entity_dict['word']['embedding'],
                    None)  # get query vector
                regularization_terms.extend(qw_embs)
            else:
                print('Treat query as a static relationship')
                init_width = 0.5 / self.embed_size
                self.query_static_vec = tf.Variable(tf.random_uniform(
                    [self.embed_size], -init_width, init_width),
                                                    name="query_emb")
                query_vec = self.query_static_vec
                regularization_terms.extend([query_vec])
            self.product_bias = tf.Variable(tf.zeros(
                [self.entity_dict['product']['size'] + 1]),
                                            name="product_b")
            uqr_loss_tensor, uqr_embs = pair_search_loss(
                self,
                self.Wu,
                query_vec,
                self.user_idxs,  # product prediction loss
                self.entity_dict['user']['embedding'],
                self.product_idxs,
                self.entity_dict['product']['embedding'],
                self.product_bias,
                len(self.entity_dict['product']['vocab']),
                self._dataset.product_distribute)
            regularization_terms.extend(uqr_embs)

            dynamic_loss = tf.reduce_sum(uqr_loss_tensor)
            #self.print_ops.append(tf.print('dynamic_loss: ', dynamic_loss, '\n'))

            # user + write -> word
            uw_loss_tensor, uw_embs = relation_nce_loss(
                self, 0.5, self.user_idxs, 'user', 'word', 'word')
            regularization_terms.extend(uw_embs)
            #self.print_ops.append(tf.print('uw_loss: ', tf.reduce_sum(uw_loss_tensor), '\n'))

            static_loss = tf.reduce_sum(uw_loss_tensor)

            # product + write -> word
            pw_loss_tensor, pw_embs = relation_nce_loss(
                self, 0.5, self.product_idxs, 'product', 'word', 'word')
            regularization_terms.extend(pw_embs)
            #self.print_ops.append(tf.print('pw_loss: ', tf.reduce_sum(pw_loss_tensor), '\n'))
            static_loss += tf.reduce_sum(pw_loss_tensor)

            # product + also_bought -> product
            if self.use_relation_dict['also_bought']:
                pab_loss_tensor, pab_embs = relation_nce_loss(
                    self, 0.5, self.product_idxs, 'product', 'also_bought',
                    'related_product')
                regularization_terms.extend(pab_embs)
                #self.print_ops.append(tf.print('pab_loss: ', tf.reduce_sum(pab_loss_tensor), '\n'))
                static_loss += tf.reduce_sum(pab_loss_tensor)

            # product + also_viewed -> product
            if self.use_relation_dict['also_viewed']:
                pav_loss_tensor, pav_embs = relation_nce_loss(
                    self, 0.5, self.product_idxs, 'product', 'also_viewed',
                    'related_product')
                regularization_terms.extend(pav_embs)
                #self.print_ops.append(tf.print('pav_loss: ', tf.reduce_sum(pav_loss_tensor), '\n'))
                static_loss += tf.reduce_sum(pav_loss_tensor)

            # product + bought_together -> product
            if self.use_relation_dict['bought_together']:
                pbt_loss_tensor, pbt_embs = relation_nce_loss(
                    self, 0.5, self.product_idxs, 'product', 'bought_together',
                    'related_product')
                regularization_terms.extend(pbt_embs)
                #self.print_ops.append(tf.print('pbt_loss: ', tf.reduce_sum(pbt_loss_tensor), '\n'))
                static_loss += tf.reduce_sum(pbt_loss_tensor)

            # product + is_brand -> brand
            if self.use_relation_dict['brand']:
                pib_loss_tensor, pib_embs = relation_nce_loss(
                    self, 0.5, self.product_idxs, 'product', 'brand', 'brand')
                regularization_terms.extend(pib_embs)
                #self.print_ops.append(tf.print('pib_loss: ', tf.reduce_sum(pib_loss_tensor), '\n'))
                static_loss += tf.reduce_sum(pib_loss_tensor)

            # product + is_category -> categories
            if self.use_relation_dict['categories']:
                pic_loss_tensor, pic_embs = relation_nce_loss(
                    self, 0.5, self.product_idxs, 'product', 'categories',
                    'categories')
                regularization_terms.extend(pic_embs)
                #self.print_ops.append(tf.print('pic_loss: ', tf.reduce_sum(pic_loss_tensor), '\n'))
                static_loss += tf.reduce_sum(pic_loss_tensor)

            #self.print_ops.append(tf.print('satic_loss: ', static_loss, '\n======\n'))
            # merge dynamic loss and static loss
            loss = None
            if self.dynamic_weight >= 0.0:
                print('Dynamic relation weight %.2f' % self.dynamic_weight)
                loss = 2 * (self.dynamic_weight * dynamic_loss +
                            (1 - self.dynamic_weight) * static_loss)
            else:
                # consider query as a static relation
                loss = dynamic_loss + static_loss

            # L2 regularization
            if self.L2_lambda > 0:
                l2_loss = tf.nn.l2_loss(regularization_terms[0])
                for i in range(1, len(regularization_terms)):
                    l2_loss += tf.nn.l2_loss(regularization_terms[i])
                loss += self.L2_lambda * l2_loss

            return loss / math_ops.cast(batch_size, dtypes.float32)