Ejemplo n.º 1
0
    def __init__(self, graph_ear, dimension=10, learning_rate=0.1, batchSize=100, margin=1,
                                regularizer_scale = 0.1, neg_rate=1, neg_rel_rate=0):
        self.entity = graph_ear.entity
        self.attribute = graph_ear.attribute
        self.relation = graph_ear.relation
        self.value = graph_ear.value
        self.dimension = dimension
        self.learning_rate = learning_rate
        self.batchSize = batchSize
        self.margin = margin
        self.neg_rate = neg_rate
        self.neg_rel_rate = neg_rel_rate

        # List of triples. Remove last incomplete batch if any.
        self.atriples = np.array(graph_ear.atriples[0: (len(graph_ear.atriples) - len(graph_ear.atriples)%batchSize)])
        self.rtriples = np.array(graph_ear.rtriples[0: (len(graph_ear.rtriples) - len(graph_ear.rtriples)%batchSize)])
        logger.info("Modified Atriples size: %d", len(self.atriples))
        logger.info("Modified Rtriples size: %d", len(self.rtriples))

        #Collect Negative Samples
        self.nrtriples = np.array(get_negative_samples(self.rtriples, len(self.entity),
                                    len(self.entity), len(self.relation), graph_ear.entity_pairs,
                                    neg_rate = self.neg_rate, neg_rel_rate = self.neg_rel_rate))
        self.natriples = np.array(get_negative_samples(self.atriples, len(self.entity),
                                len(self.value), len(self.attribute), [],
                                neg_rate = self.neg_rate, neg_rel_rate = self.neg_rel_rate))

        #Define Embedding Variables
        initializer = tf.contrib.layers.xavier_initializer(uniform = True)
        regularizer = tf.contrib.layers.l2_regularizer(scale = regularizer_scale)

        self.ent_embeddings = tf.get_variable(name = "ent_embeddings", shape = [len(self.entity), dimension],
                                    initializer = initializer, regularizer = regularizer)
        self.rel_embeddings = tf.get_variable(name = "rel_embeddings", shape = [len(self.relation), dimension],
                                    initializer = initializer, regularizer = regularizer)
        self.attr_embeddings = tf.get_variable(name = "attr_embeddings", shape = [len(self.attribute), dimension],
                                    initializer = initializer, regularizer = regularizer)
        self.val_embeddings = tf.get_variable(name = "val_embeddings", shape = [len(self.value), dimension],
                                    initializer = initializer, regularizer = regularizer)
        self.projection_matrix = tf.get_variable(name = "projection_matrix", shape = [len(self.attribute), dimension],
                                    initializer = initializer, regularizer = regularizer)

        #Define Placeholders for input
        self.head = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.tail = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.rel = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_head = tf.placeholder(tf.int32, shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_tail = tf.placeholder(tf.int32, shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_rel = tf.placeholder(tf.int32, shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])

        self.attr_head = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.val = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.attr = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_attr_head = tf.placeholder(tf.int32, shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_val = tf.placeholder(tf.int32, shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_attr = tf.placeholder(tf.int32, shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])

        #Load Embedding Vectors for Relational Triple
        pos_h = tf.nn.embedding_lookup(self.ent_embeddings, self.head)
        pos_t = tf.nn.embedding_lookup(self.ent_embeddings, self.tail)
        pos_r = tf.nn.embedding_lookup(self.rel_embeddings, self.rel)
        pos_nh = tf.nn.embedding_lookup(self.ent_embeddings, self.neg_head)
        pos_nt = tf.nn.embedding_lookup(self.ent_embeddings, self.neg_tail)
        pos_nr = tf.nn.embedding_lookup(self.rel_embeddings, self.neg_rel)

        #Normalize vectors
        pos_h = tf.nn.l2_normalize(pos_h, [1,2])
        pos_t = tf.nn.l2_normalize(pos_t, [1,2])
        pos_r = tf.nn.l2_normalize(pos_r, [1,2])
        pos_nh = tf.nn.l2_normalize(pos_nh, [1,2])
        pos_nt = tf.nn.l2_normalize(pos_nt, [1,2])
        pos_nr = tf.nn.l2_normalize(pos_nr, [1,2])

        #Load Embedding Vectors for Attributional Triple
        pos_attr_h = tf.nn.embedding_lookup(self.ent_embeddings, self.attr_head)
        pos_val = tf.nn.embedding_lookup(self.val_embeddings, self.val)
        pos_attr = tf.nn.embedding_lookup(self.attr_embeddings, self.attr)
        pos_attr_nh = tf.nn.embedding_lookup(self.ent_embeddings, self.neg_attr_head)
        pos_attr_nv = tf.nn.embedding_lookup(self.val_embeddings, self.neg_val)
        pos_attr_na = tf.nn.embedding_lookup(self.attr_embeddings, self.neg_attr)

        #Normalize vectors
        pos_attr_h = tf.nn.l2_normalize(pos_attr_h, [1,2])
        pos_val = tf.nn.l2_normalize(pos_val, [1,2])
        pos_attr = tf.nn.l2_normalize(pos_attr, [1,2])
        pos_attr_nh = tf.nn.l2_normalize(pos_attr_nh, [1,2])
        pos_attr_nv = tf.nn.l2_normalize(pos_attr_nv, [1,2])
        pos_attr_na = tf.nn.l2_normalize(pos_attr_na, [1,2])

        #Load Normal Vectors
        pos_proj = tf.nn.embedding_lookup(self.projection_matrix, self.attr)
        pos_nproj = tf.nn.embedding_lookup(self.projection_matrix, self.neg_attr)

        #Normalize vectors
        pos_proj = tf.nn.l2_normalize(pos_proj, [1,2])
        pos_nproj = tf.nn.l2_normalize(pos_nproj, [1,2])

        proj_pos_attr_h = self._transfer(pos_attr_h, pos_proj)
        proj_pos_attr_nh = self._transfer(pos_attr_nh, pos_nproj)

        #Compute Loss
        _p_score = self._calc(pos_h, pos_t, pos_r)
        _n_score = self._calc(pos_nh, pos_nt, pos_nr)

        _ap_score = self._attr_calc(proj_pos_attr_h, pos_val, pos_attr)
        _an_score = self._attr_calc(proj_pos_attr_nh, pos_attr_nv, pos_attr_na)

        p_score = tf.reduce_sum(tf.reduce_mean(_p_score, 1, keepdims=False), axis=1, keepdims=True)
        n_score = tf.reduce_sum(tf.reduce_mean(_n_score, 1, keepdims=False), axis=1, keepdims=True)
        ap_score = tf.reduce_sum(tf.reduce_mean(_ap_score, 1, keepdims=False), axis=1, keepdims=True)
        an_score = tf.reduce_sum(tf.reduce_mean(_an_score, 1, keepdims=False), axis=1, keepdims=True)
        self.rel_loss = tf.reduce_sum(tf.maximum(p_score - n_score + self.margin, 0))
        self.attr_loss = tf.reduce_sum(tf.maximum(ap_score - an_score + self.margin, 0))

        #Configure optimizer
        self.rel_optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.rel_loss)
        self.attr_optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.attr_loss)

         #Configure session
        self.sess = tf.Session()

        #Collect summary for tensorboard
        tf.summary.scalar('attr_loss', self.attr_loss, collections=['attr'])
        tf.summary.scalar('rel_loss', self.rel_loss, collections=['rel'])
        tf.summary.scalar('p_score', tf.reduce_mean(p_score), collections=['rel'])
        tf.summary.scalar('n_score', tf.reduce_mean(n_score), collections=['rel'])
        tf.summary.scalar('ap_score', tf.reduce_mean(ap_score), collections=['attr'])
        tf.summary.scalar('an_score', tf.reduce_mean(an_score), collections=['attr'])


        #Confirgure summary location
        self.merged_attr = tf.summary.merge_all(key='attr')
        self.merged_rel = tf.summary.merge_all(key='rel')
        self.attr_summary_writer = tf.summary.FileWriter(get_tf_summary_file_path(logger) + '_attr', self.sess.graph)
        self.rel_summary_writer = tf.summary.FileWriter(get_tf_summary_file_path(logger) +'_rel', self.sess.graph)
Ejemplo n.º 2
0
    def __init__(self,
                 dataset,
                 columns,
                 dimension=10,
                 batchSize=100,
                 learning_rate=0.1,
                 margin=1,
                 regularizer_scale=0.1,
                 test_values=None):
        """
            Constructor to build the tf model, define required placeholders,
            define loss and optimization method.
        """
        logger.info("Begin generating VEER embeddings with dimension : %d",
                    dimension)

        self.dataset = dataset(
        )  #Model Class containing data (Census, Cora or FEBRL)
        self.columns = columns  #List of column names of interest
        self.dimension = dimension  #Embedding Dimension
        self.batchSize = batchSize  #BatchSize for Stochastic Gradient Decent
        self.learning_rate = learning_rate  #Learning rate for optmizer
        self.margin = margin  #margin or bias used for loss computation

        #Collect all values
        self.values = []
        for data in [
                self.dataset.trainDataA, self.dataset.trainDataB,
                self.dataset.valDataA, self.dataset.valDataB,
                self.dataset.testDataA, self.dataset.testDataB
        ]:
            for col in self.columns:
                self.values.extend(list(data[col]))
        self.values = list([self._clean(v) for v in self.values])
        if test_values:
            self.values.extend(list(test_values))
        #Default for Missing Value. Should pass test_values to avoid missing values.
        self.values.append('missing_value')
        self.values = list(set(self.values))
        logger.info("No. of unique values: %d", len(self.values))

        #Define Embedding Variables
        initializer = tf.contrib.layers.xavier_initializer(uniform=True)
        regularizer = tf.contrib.layers.l2_regularizer(scale=regularizer_scale)

        self.val_embeddings = tf.get_variable(
            name="val_embeddings",
            shape=[len(self.values), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.col_weights = tf.get_variable(name="col_weights",
                                           shape=[len(self.columns)],
                                           initializer=initializer,
                                           regularizer=regularizer)

        #Define Placeholders for input
        self.record_a = tf.placeholder(
            tf.int32, shape=[self.batchSize, len(self.columns)])
        self.record_b = tf.placeholder(
            tf.int32, shape=[self.batchSize, len(self.columns)])
        self.truth_val = tf.placeholder(tf.float32, shape=[self.batchSize, 1])

        pos_a = tf.nn.embedding_lookup(self.val_embeddings, self.record_a)
        pos_b = tf.nn.embedding_lookup(self.val_embeddings, self.record_b)
        pos_col_wt = tf.nn.embedding_lookup(self.col_weights,
                                            range(len(self.columns)))

        logger.info("Shape of pos a :%s", str(pos_a.shape))
        logger.info("Shape of pos b :%s", str(pos_b.shape))
        logger.info("Shape of pos col wt :%s", str(pos_col_wt.shape))

        #Normalize embeddings
        pos_a = tf.nn.l2_normalize(pos_a, 2)
        pos_b = tf.nn.l2_normalize(pos_b, 2)
        pos_col_wt = tf.nn.l2_normalize(pos_col_wt)

        #Compute loss and prediction
        self.score = tf.matmul(
            tf.reduce_mean(tf.abs(pos_a - pos_b), 2, keepdims=False),
            tf.expand_dims(pos_col_wt, 1))
        logger.info("Shape of score: %s", str(self.score.shape))
        self.predict = tf.sigmoid(self.score)
        logger.info("Shape of predict: %s", str(self.predict.shape))
        _loss = tf.maximum(self.margin + (self.score * self.truth_val), 0)
        logger.info("Shape of _loss: %s", str(_loss.shape))
        self.loss = tf.reduce_sum(tf.reduce_mean(_loss, 1, keepdims=False),
                                  keepdims=True)
        logger.info("Shape of loss: %s", str(self.loss.shape))

        logger.info("Aggregated loss shape:%s", str(self.loss[0].shape))

        #collect summary parameters
        tf.summary.scalar('loss', self.loss[0])

        #Configure Performance measures
        int_truth_val = tf.cast((self.truth_val + 1) / 2, tf.int64)
        int_predict_val = tf.cast(self.predict, tf.int64)

        self.accuracy = tf.contrib.metrics.accuracy(int_predict_val,
                                                    int_truth_val)
        tf.summary.scalar('Accuracy', self.accuracy)
        """
        #Todo: F-Score is always 0, fix it.
        self.f1_score, _ = tf.contrib.metrics.f1_score(int_truth_val, self.predict[:,0])
        tf.summary.scalar('F-Score', self.f1_score)

        #Todo: MAP@1 is not as per the queries we expect based on match / non-match class.
        int_predict_val = tf.reshape(tf.stack([self.predict, 1 - self.predict], 1),
                                (self.batchSize, 2))
        logger.info("Shape of int_predict_val: %s", str(int_predict_val.shape))
        self.map1, _ = tf.metrics.average_precision_at_k(int_truth_val, int_predict_val, 1)
        self.map10, _ = tf.metrics.average_precision_at_k(int_truth_val, int_predict_val, 2)
        tf.summary.scalar('MAP@1', self.map1)
        tf.summary.scalar('MAP@10', self.map10)
        """

        #Configure optimizer
        self.optimizer = tf.train.GradientDescentOptimizer(
            self.learning_rate).minimize(self.loss)

        #Configure session
        self.sess = tf.Session()

        #Confirgure summary location
        self.merged = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '/train', self.sess.graph)
        self.validation_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '/val', self.sess.graph)

        #Configure Saver
        self.saver = tf.train.Saver()
Ejemplo n.º 3
0
    def __init__(self, model, columns, entity, ent_embeddings, rel_embedding=None,
                                    batchSize=100, learning_rate=0.1,
                                    margin=1, regularizer_scale = 0.1):
        logger.info("Initializing WERL to learn weights")
        self.dataset = model()
        self.columns = columns
        self.entity = entity
        self.ent_embeddings = ent_embeddings
        self.dimension = len(ent_embeddings[0])
        self.batchSize = batchSize
        self.learning_rate = learning_rate
        self.margin = margin
        self.regularizer_scale = regularizer_scale

        self.map_ent_to_embedding = {entity[i] : ent_embeddings[i] for i in range(len(entity))}
        self.zero_vector = [0] * self.dimension
        self.default_vector_a = [1] * self.dimension
        self.default_vector_b = [-1] * self.dimension
        if rel_embedding is None:
            self.rel_embedding = np.zeros((len(self.columns), self.dimension))
        else:
            self.rel_embedding = rel_embedding
        self.get_embed = (lambda x: self.map_ent_to_embedding[x]
                                if x in self.map_ent_to_embedding
                                else self.zero_vector) #np.random.randn(self.dimension))

        self.get_embed_default = (lambda x, d: self.map_ent_to_embedding[x]
                                if x in self.map_ent_to_embedding
                                else d)
                                #self.zero_vector)

        #Define Trainable Weights for each feature
        initializer = tf.contrib.layers.xavier_initializer(uniform = True)
        regularizer = tf.contrib.layers.l2_regularizer(scale = regularizer_scale)
        self.weights = tf.get_variable(name = "weights", shape = [len(self.columns), 1], #self.dimension],
                                    initializer = initializer, regularizer = regularizer)

        #Define Placeholders for input
        self.record_a = tf.placeholder(tf.float32, shape=[self.batchSize, len(columns), self.dimension])
        self.record_b = tf.placeholder(tf.float32, shape=[self.batchSize, len(columns), self.dimension])
        self.truth_val = tf.placeholder(tf.float32, shape=[self.batchSize, 1])
        self.same_val = tf.placeholder(tf.float32, shape=[self.batchSize, len(columns)])
        self.norm_weights = tf.nn.l2_normalize(self.weights)

        long_val = tf.tile(tf.expand_dims(self.same_val, 2), [1, 1, self.dimension])
        logger.info("long_val %s", str(long_val.shape))

        self.score_merl = tf.reduce_sum(tf.math.multiply(
                        tf.abs(self.record_a - self.record_b + self.rel_embedding),
                        long_val), 2, keepdims=False) / len(self.columns)
        self.score = tf.matmul(self.score_merl, self.norm_weights)
        self.predict = tf.sigmoid(self.score) #/ len(self.columns))
        self.predict_merl = tf.sigmoid(self.score_merl) #/ len(self.columns))
        _loss = tf.maximum(0.0, self.margin + (self.score * self.truth_val))
        self.loss = tf.reduce_sum(tf.reduce_mean(_loss, 1, keepdims = False), keepdims = True)
        #_loss = tf.reduce_sum(tf.reduce_mean((self.score * self.truth_val), 1, keepdims = False), keepdims = True)
        #self.loss = tf.maximum(0.0, margin + _loss)
        #collect summary parameters
        tf.summary.scalar('loss', self.loss[0])

        #Configure Performance measures
        int_truth_val = tf.cast((self.truth_val+1)/2, tf.int64)
        int_predict_val = tf.cast(self.predict, tf.int64)

        self.accuracy = tf.contrib.metrics.accuracy(int_predict_val, int_truth_val)
        tf.summary.scalar('Accuracy', self.accuracy)

        #Configure optimizer
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)

        #Configure session
        self.sess = tf.Session()

        #Confirgure summary location
        self.merged = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(get_tf_summary_file_path(logger) + '/train', self.sess.graph)
        self.validation_summary_writer = tf.summary.FileWriter(get_tf_summary_file_path(logger) + '/val',
                                    self.sess.graph)

        #Configure Saver
        self.saver = tf.train.Saver()
Ejemplo n.º 4
0
    def __init__(self, graph_er, dimension=10, batchSize=100,
                    learning_rate=0.1, margin=1, regularizer_scale = 0.1, neg_rate=1, neg_rel_rate=0):
        logger.info("Begin generating TransH embeddings with dimension : %d" ,dimension)

        self.dimension = dimension #Embedding Dimension
        self.batchSize = batchSize #BatchSize for Stochastic Gradient Decent
        self.learning_rate = learning_rate #Learning rate for optmizer
        self.margin = margin #margin or bias used for loss computation
        self.entity = graph_er.entity #List of entities in Knowledge graph
        self.relation = graph_er.relation #List of relationships in Knowledge Graph
        self.neg_rate = neg_rate #Number of Negative samples to generate by replacing head or tail
        self.neg_rel_rate = neg_rel_rate #Number fo negative samples by replacing realtion

        # List of triples. Remove last incomplete batch if any.
        self.triples = np.array(graph_er.triples[0: (len(graph_er.triples) - len(graph_er.triples)%batchSize)])
        self.ntriples = np.array(get_negative_samples(self.triples, len(self.entity),
                                        len(self.entity), len(self.relation), graph_er.entity_pairs,
                                        neg_rate=neg_rate, neg_rel_rate=neg_rel_rate))
        logger.info("Shape of triples: %s", str(self.triples.shape))
        logger.info("Shape of neg triples: %s", str(self.ntriples.shape))

        #Define Embedding Variables
        initializer = tf.contrib.layers.xavier_initializer(uniform = True)
        regularizer = tf.contrib.layers.l2_regularizer(scale = regularizer_scale)

        self.ent_embeddings = tf.get_variable(name = "ent_embeddings", shape = [len(self.entity), dimension],
                                    initializer = initializer, regularizer = regularizer)
        self.rel_embeddings = tf.get_variable(name = "rel_embeddings", shape = [len(self.relation), dimension],
                                    initializer = initializer, regularizer = regularizer)
        self.norm_vector = tf.get_variable(name = "norm_vector", shape = [len(self.relation), dimension],
                                    initializer = initializer, regularizer = regularizer)

        #Define Placeholders for input
        self.head = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.tail = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.rel = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_head = tf.placeholder(tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_tail= tf.placeholder(tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_rel= tf.placeholder(tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])

        #Load Embedding Vectors
        pos_h = tf.nn.embedding_lookup(self.ent_embeddings, self.head)
        pos_t = tf.nn.embedding_lookup(self.ent_embeddings, self.tail)
        pos_r = tf.nn.embedding_lookup(self.rel_embeddings, self.rel)
        pos_nh = tf.nn.embedding_lookup(self.ent_embeddings, self.neg_head)
        pos_nt = tf.nn.embedding_lookup(self.ent_embeddings, self.neg_tail)
        pos_nr = tf.nn.embedding_lookup(self.rel_embeddings, self.neg_rel)
        pos_norm = tf.nn.embedding_lookup(self.norm_vector, self.rel)
        pos_nnorm = tf.nn.embedding_lookup(self.norm_vector, self.neg_rel)

        #Normalize embedding vectors
        pos_h = tf.nn.l2_normalize(pos_h, [1,2])
        pos_t = tf.nn.l2_normalize(pos_t, [1,2])
        pos_r = tf.nn.l2_normalize(pos_r, [1,2])

        pos_nh = tf.nn.l2_normalize(pos_nh, [1,2])
        pos_nt = tf.nn.l2_normalize(pos_nt, [1,2])
        pos_nr = tf.nn.l2_normalize(pos_nr, [1,2])

        pos_norm = tf.nn.l2_normalize(pos_norm, [1,2])
        pos_nnorm = tf.nn.l2_normalize(pos_nnorm, [1,2])

        #Project entities to hyperplane
        pos_h = self._transfer(pos_h, pos_norm)
        pos_t = self._transfer(pos_t, pos_norm)
        pos_nh = self._transfer(pos_nh, pos_nnorm)
        pos_nt = self._transfer(pos_nt, pos_nnorm)
        logger.info("Pos Triple Shapes: %s, %s, %s", str(pos_h.shape), str(pos_t.shape), str(pos_r.shape))
        logger.info("Neg Triple Shapes: %s, %s, %s", str(pos_nh.shape), str(pos_nt.shape), str(pos_nr.shape))

        #Compute Loss
        _p_score = self._calc(pos_h, pos_t, pos_r)
        _n_score = self._calc(pos_nh, pos_nt, pos_nr)

        p_score = tf.reduce_sum(tf.reduce_mean(_p_score, 0, keepdims=False), keepdims=True, axis=1)
        n_score = tf.reduce_sum(tf.reduce_mean(_n_score, 0, keepdims=False), keepdims=True, axis=1)
        logger.info("PScore Shape %s. N_score Shape: %s", str(p_score.shape), str(n_score.shape))

        self.loss = tf.reduce_sum(tf.maximum(p_score - n_score + self.margin, 0))

        #collect summary parameters
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('pos_score', tf.reduce_mean(p_score))
        tf.summary.scalar('neg_score', tf.reduce_mean(n_score))

        #Configure optimizer
        self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)

        #Configure session
        self.sess = tf.Session()

        #Confirgure summary location
        self.merged = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(get_tf_summary_file_path(logger), self.sess.graph)

        #Configure Saver
        self.saver = tf.train.Saver()
Ejemplo n.º 5
0
    def __init__(self,
                 graph_erer,
                 dimension=64,
                 batchSize=128,
                 learning_rate=0.1,
                 alpha=5,
                 margin=1,
                 neg_rate=7,
                 neg_rel_rate=1,
                 regularizer_scale=0.1,
                 beta=5):
        self.entityA = graph_erer.entityA
        self.entityB = graph_erer.entityB
        self.relationA = graph_erer.relationA
        self.relationB = graph_erer.relationB

        self.dimension = dimension  #Embedding Dimension
        self.batchSize = batchSize  #BatchSize for Stochastic Gradient Decent
        self.learning_rate = learning_rate  #Learning rate for optmizer
        self.neg_rel_rate = neg_rel_rate
        self.neg_rate = neg_rate
        self.margin = margin
        self.alpha = alpha
        self.beta = beta

        # List of triples. Remove last incomplete batch if any.
        self.triplesA = np.array(
            graph_erer.triplesA[0:(len(graph_erer.triplesA) -
                                   len(graph_erer.triplesA) % batchSize)])
        self.triplesB = np.array(
            graph_erer.triplesB[0:(len(graph_erer.triplesB) -
                                   len(graph_erer.triplesB) % batchSize)])
        logger.info("Shape of triples A: %s", str(self.triplesA.shape))
        logger.info("Shape of triples B: %s", str(self.triplesB.shape))

        #Collect Negative Samples
        self.ntriplesA = np.array(
            get_negative_samples(graph_erer.triplesA,
                                 len(self.entityA),
                                 len(self.entityA),
                                 len(self.relationA),
                                 graph_erer.entity_pairs,
                                 neg_rate=neg_rate,
                                 neg_rel_rate=neg_rel_rate))
        self.ntriplesB = np.array(
            get_negative_samples(graph_erer.triplesB,
                                 len(self.entityB),
                                 len(self.entityB),
                                 len(self.relationB),
                                 graph_erer.entity_pairs,
                                 neg_rate=neg_rate,
                                 neg_rel_rate=neg_rel_rate))
        logger.info("Shape of negative triples A: %s",
                    str(self.ntriplesA.shape))
        logger.info("Shape of negative triples B: %s",
                    str(self.ntriplesB.shape))

        # List of ILLs / linked entities. Remove last incomplete batch if any.
        self.prior_pairs = np.array(graph_erer.prior_pairs[0:(
            len(graph_erer.prior_pairs) -
            len(graph_erer.prior_pairs) % batchSize)])
        dummy_rel = -1  #using just to reuse neg sampling method for triplets.
        pp_triples = [(a, b, dummy_rel) for a, b in self.prior_pairs]
        neg_pp_triples = np.array(
            get_negative_samples(pp_triples,
                                 len(self.entityA),
                                 len(self.entityB),
                                 1,
                                 graph_erer.entity_pairs,
                                 neg_rate=neg_rate,
                                 neg_rel_rate=0))
        #Note: neg_rel_rate is 0 because we don't want to relace r but only h or t.

        self.neg_prior_pairs = np.array([(h, t)
                                         for (h, t, r) in neg_pp_triples])
        logger.info("Shape of prior_pairs: %s", str(self.prior_pairs.shape))
        logger.info("Shape of negative prior_pairs: %s",
                    str(self.neg_prior_pairs.shape))

        self.unique_rels = list(set(self.relationA + self.relationB))
        logger.info("No. of unique relations: %s", len(self.unique_rels))
        self.relA_to_urel_map = {}
        for i in range(len(graph_erer.relationA)):
            self.relA_to_urel_map[i] = self.unique_rels.index(
                graph_erer.relationA[i])
        self.relB_to_urel_map = {}
        for i in range(len(graph_erer.relationB)):
            self.relB_to_urel_map[i] = self.unique_rels.index(
                graph_erer.relationB[i])

        #Generate Evolution Pairs from Prior Pairs
        self.evolution_pairs = []
        for (a, b) in self.prior_pairs:
            a_triples = [(h, t, r) for (h, t, r) in self.triplesA if h == a]
            b_triples = [(h, t, r) for (h, t, r) in self.triplesB if h == b]
            for (ah, at, ar) in a_triples:
                unique_rel_indexA = self.relA_to_urel_map[int(ar)]
                bt = [t for (h, t, r) in b_triples if unique_rel_indexA == \
                                self.relB_to_urel_map[int(r)]]
                if len(bt):
                    self.evolution_pairs.append((at, bt[0], unique_rel_indexA))

        self.evolution_pairs = np.array(self.evolution_pairs[0:(
            len(self.evolution_pairs) -
            len(self.evolution_pairs) % self.batchSize)])
        self.neg_evolution_pairs = np.array(
            get_negative_samples(self.evolution_pairs,
                                 len(self.entityA),
                                 len(self.entityB),
                                 1,
                                 graph_erer.entity_pairs,
                                 neg_rate=neg_rate,
                                 neg_rel_rate=0))
        #Note: neg_rel_rate is 0 because we don't want to relace r but only h or t.

        logger.info("No. of evolution_pairs: %s",
                    str(self.evolution_pairs.shape))
        logger.info("Shape of negative evolution_pairs: %s",
                    str(self.neg_evolution_pairs.shape))
        #Define Embedding Variables
        initializer = tf.contrib.layers.xavier_initializer(uniform=True)
        regularizer = tf.contrib.layers.l2_regularizer(scale=regularizer_scale)
        self.ent_embeddings_A = tf.get_variable(
            name="ent_embeddings_A",
            shape=[len(self.entityA), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.ent_embeddings_B = tf.get_variable(
            name="ent_embeddings_B",
            shape=[len(self.entityB), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.rel_embeddings_A = tf.get_variable(
            name="rel_embeddings_A",
            shape=[len(self.relationA), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.rel_embeddings_B = tf.get_variable(
            name="rel_embeddings_B",
            shape=[len(self.relationB), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.projection_matrix = tf.get_variable(name="projection_matrix",
                                                 shape=[dimension, dimension],
                                                 initializer=initializer,
                                                 regularizer=regularizer)
        self.evolution_vectors = tf.get_variable(
            name="evolution_vectors",
            shape=[len(self.unique_rels), dimension],
            initializer=initializer,
            regularizer=regularizer)

        #Define Placeholders for input
        self.headA = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.tailA = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.relA = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_headA = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_tailA = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_relA = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])

        self.headB = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.tailB = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.relB = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_headB = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_tailB = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_relB = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])

        self.ent_A = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.ent_B = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_ent_A = tf.placeholder(tf.int32,
                                        shape=[self.batchSize, neg_rate])
        self.neg_ent_B = tf.placeholder(tf.int32,
                                        shape=[self.batchSize, neg_rate])

        self.evolve_A = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.evolve_B = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.evolve_rel = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_evolve_A = tf.placeholder(tf.int32,
                                           shape=[self.batchSize, neg_rate])
        self.neg_evolve_B = tf.placeholder(tf.int32,
                                           shape=[self.batchSize, neg_rate])
        self.neg_evolve_rel = tf.placeholder(tf.int32,
                                             shape=[self.batchSize, neg_rate])

        pos_ha = tf.nn.embedding_lookup(self.ent_embeddings_A, self.headA)
        pos_ta = tf.nn.embedding_lookup(self.ent_embeddings_A, self.tailA)
        pos_ra = tf.nn.embedding_lookup(self.rel_embeddings_A, self.relA)
        pos_nha = tf.nn.embedding_lookup(self.ent_embeddings_A, self.neg_headA)
        pos_nta = tf.nn.embedding_lookup(self.ent_embeddings_A, self.neg_tailA)
        pos_nra = tf.nn.embedding_lookup(self.rel_embeddings_A, self.neg_relA)

        pos_hb = tf.nn.embedding_lookup(self.ent_embeddings_B, self.headB)
        pos_tb = tf.nn.embedding_lookup(self.ent_embeddings_B, self.tailB)
        pos_rb = tf.nn.embedding_lookup(self.rel_embeddings_B, self.relB)
        pos_nhb = tf.nn.embedding_lookup(self.ent_embeddings_B, self.neg_headB)
        pos_ntb = tf.nn.embedding_lookup(self.ent_embeddings_B, self.neg_tailB)
        pos_nrb = tf.nn.embedding_lookup(self.rel_embeddings_B, self.neg_relB)

        pos_entA = tf.nn.embedding_lookup(self.ent_embeddings_A, self.ent_A)
        pos_entB = tf.nn.embedding_lookup(self.ent_embeddings_B, self.ent_B)
        pos_nentA = tf.nn.embedding_lookup(self.ent_embeddings_A,
                                           self.neg_ent_A)
        pos_nentB = tf.nn.embedding_lookup(self.ent_embeddings_B,
                                           self.neg_ent_B)
        pos_ent_proj = tf.nn.embedding_lookup(self.projection_matrix,
                                              range(0, dimension))

        pos_evolA = tf.nn.embedding_lookup(self.ent_embeddings_A,
                                           self.evolve_A)
        pos_evolB = tf.nn.embedding_lookup(self.ent_embeddings_B,
                                           self.evolve_B)
        pos_evolve_vec = tf.nn.embedding_lookup(self.evolution_vectors,
                                                self.evolve_rel)
        pos_nevolA = tf.nn.embedding_lookup(self.ent_embeddings_A,
                                            self.neg_evolve_A)
        pos_nevolB = tf.nn.embedding_lookup(self.ent_embeddings_B,
                                            self.neg_evolve_B)
        pos_nevolve_vec = tf.nn.embedding_lookup(self.evolution_vectors,
                                                 self.neg_evolve_rel)

        #Normalize embeddings
        pos_ha = tf.nn.l2_normalize(pos_ha, [1, 2])
        pos_ta = tf.nn.l2_normalize(pos_ta, [1, 2])
        pos_ra = tf.nn.l2_normalize(pos_ra, [1, 2])
        pos_nha = tf.nn.l2_normalize(pos_nha, [1, 2])
        pos_nta = tf.nn.l2_normalize(pos_nta, [1, 2])
        pos_nra = tf.nn.l2_normalize(pos_nra, [1, 2])

        pos_hb = tf.nn.l2_normalize(pos_hb, [1, 2])
        pos_tb = tf.nn.l2_normalize(pos_tb, [1, 2])
        pos_rb = tf.nn.l2_normalize(pos_rb, [1, 2])
        pos_nhb = tf.nn.l2_normalize(pos_nhb, [1, 2])
        pos_ntb = tf.nn.l2_normalize(pos_ntb, [1, 2])
        pos_nrb = tf.nn.l2_normalize(pos_nrb, [1, 2])

        pos_entA = tf.nn.l2_normalize(pos_entA, [1, 2])
        pos_entB = tf.nn.l2_normalize(pos_entB, [1, 2])
        pos_nentA = tf.nn.l2_normalize(pos_nentA, [1, 2])
        pos_nentB = tf.nn.l2_normalize(pos_nentB, [1, 2])
        pos_ent_proj = tf.nn.l2_normalize(pos_ent_proj, 1)

        pos_evolA = tf.nn.l2_normalize(pos_evolA, [1, 2])
        pos_evolB = tf.nn.l2_normalize(pos_evolB, [1, 2])
        pos_evolve_vec = tf.nn.l2_normalize(pos_evolve_vec, 1)
        pos_nevolA = tf.nn.l2_normalize(pos_nevolA, [1, 2])
        pos_nevolB = tf.nn.l2_normalize(pos_nevolB, [1, 2])
        pos_nevolve_vec = tf.nn.l2_normalize(pos_nevolve_vec, 1)

        logger.info("Triple Shapes A: %s, %s, %s", str(pos_ha.shape),
                    str(pos_ta.shape), str(pos_ra.shape))
        logger.info("Triple Shapes B: %s, %s, %s", str(pos_hb.shape),
                    str(pos_tb.shape), str(pos_rb.shape))
        logger.info("Prior Pairs Shape: %s, %s, %s", str(pos_entA.shape),
                    str(pos_entB.shape), str(pos_ent_proj.shape))
        logger.info("Evolution Pairs Shape: %s, %s, %s", str(pos_evolA.shape),
                    str(pos_evolB.shape), str(pos_evolve_vec.shape))

        #Compute loss: knowledge model
        _ap_score = self._calc(pos_ha, pos_ta, pos_ra)
        _an_score = self._calc(pos_nha, pos_nta, pos_nra)
        #Note: can use also: tf.abs(tf.subtract(tf.add(pos_ha, pos_ra), post_ta))

        ap_score = tf.reduce_sum(tf.reduce_mean(_ap_score, 1, keepdims=False),
                                 1,
                                 keepdims=True)
        an_score = tf.reduce_sum(tf.reduce_mean(_an_score, 1, keepdims=False),
                                 1,
                                 keepdims=True)

        _bp_score = self._calc(pos_hb, pos_tb, pos_rb)
        _bn_score = self._calc(pos_nhb, pos_ntb, pos_nrb)

        bp_score = tf.reduce_sum(tf.reduce_mean(_bp_score, 1, keepdims=False),
                                 1,
                                 keepdims=True)
        bn_score = tf.reduce_sum(tf.reduce_mean(_bn_score, 1, keepdims=False),
                                 1,
                                 keepdims=True)

        self.a_score = tf.reduce_sum(
            tf.maximum(ap_score - an_score + self.margin, 0))
        self.b_score = tf.reduce_sum(
            tf.maximum(bp_score - bn_score + self.margin, 0))
        logger.info("AScore Shape %s. Bscore Shape: %s",
                    str(self.a_score.shape), str(self.b_score.shape))

        self.optimizer = tf.train.GradientDescentOptimizer(
            self.learning_rate)  #AdamOptimizer(self.learning_rate)
        self.optimizer_A = self.optimizer.minimize(
            self.a_score,
            var_list=[self.ent_embeddings_A, self.rel_embeddings_A])
        self.optimizer_B = self.optimizer.minimize(
            self.b_score,
            var_list=[self.ent_embeddings_B, self.rel_embeddings_B])

        #Compute loss: alignment model
        _p_alignment_loss = self._calc_alignment(
            tf.reshape(pos_entA, [self.batchSize, self.dimension]),
            tf.reshape(pos_entB, [self.batchSize, self.dimension]),
            pos_ent_proj)
        _n_alignment_loss = self._calc_alignment(
            tf.reshape(pos_nentA, [self.batchSize * neg_rate, self.dimension]),
            tf.reshape(pos_nentB, [self.batchSize * neg_rate, self.dimension]),
            pos_ent_proj)
        #Note: can also use: tf.abs(tf.subtract(tf.matmul(pos_entA, pos_ent_proj), pos_entB))

        _p_alignment_loss = tf.reshape(_p_alignment_loss,
                                       [self.batchSize, 1, self.dimension])
        _n_alignment_loss = tf.reshape(
            _n_alignment_loss, [self.batchSize, neg_rate, self.dimension])

        logger.info("Shape of P Align Score: %s", str(_p_alignment_loss.shape))
        logger.info("Shape of N Align Score: %s", str(_n_alignment_loss.shape))
        p_alignment_loss = tf.reduce_sum(tf.reduce_mean(_p_alignment_loss,
                                                        1,
                                                        keepdims=False),
                                         1,
                                         keepdims=True)
        n_alignment_loss = tf.reduce_sum(tf.reduce_mean(_n_alignment_loss,
                                                        1,
                                                        keepdims=False),
                                         1,
                                         keepdims=True)

        self.alignment_loss = tf.reduce_sum(
            tf.maximum(p_alignment_loss - n_alignment_loss + self.margin, 0))
        logger.info("Alignment loss Shape %s.", str(self.alignment_loss.shape))

        #Configure optimizer
        self.optimizer_AM = self.optimizer.minimize(
            self.alignment_loss * self.alpha,
            var_list=[
                self.projection_matrix, self.ent_embeddings_A,
                self.ent_embeddings_B
            ])

        logger.info("Shape of PosA Evolve: %s", str(pos_evolA.shape))
        logger.info("Shape of PosB Evolve: %s", str(pos_evolB.shape))
        logger.info("Shape of project Evolve: %s", str(pos_ent_proj.shape))
        logger.info("Shape of Evolve Vector: %s", str(pos_evolve_vec.shape))
        #Compute Evolution Loss
        _p_evolve_score = self._calc_evolve(
            tf.reshape(pos_evolA, [self.batchSize, self.dimension]),
            tf.reshape(pos_evolB, [self.batchSize, self.dimension]),
            pos_ent_proj,
            tf.reshape(pos_evolve_vec, [self.batchSize, self.dimension]))

        _n_evolve_score = self._calc_evolve(
            tf.reshape(pos_nevolA,
                       [self.batchSize * neg_rate, self.dimension]),
            tf.reshape(pos_nevolB,
                       [self.batchSize * neg_rate, self.dimension]),
            pos_ent_proj,
            tf.reshape(pos_nevolve_vec,
                       [self.batchSize * neg_rate, self.dimension]))

        _p_evolve_score = tf.reshape(_p_evolve_score,
                                     [self.batchSize, 1, self.dimension])
        _n_evolve_score = tf.reshape(
            _n_evolve_score, [self.batchSize, neg_rate, self.dimension])

        logger.info("Shape of P Evolve Score: %s", str(_p_evolve_score.shape))
        logger.info("Shape of N Evolve Score: %s", str(_n_evolve_score.shape))
        p_evolve_score = tf.reduce_sum(tf.reduce_mean(_p_evolve_score,
                                                      1,
                                                      keepdims=False),
                                       1,
                                       keepdims=True)
        n_evolve_score = tf.reduce_sum(tf.reduce_mean(_n_evolve_score,
                                                      1,
                                                      keepdims=False),
                                       1,
                                       keepdims=True)

        self.evolution_loss = tf.reduce_sum(
            tf.maximum(p_evolve_score - n_evolve_score + self.margin, 0))
        logger.info("Evolution loss Shape %s.", str(self.evolution_loss.shape))

        #Configure optimizer
        self.optimizer_evolve = self.optimizer.minimize(
            self.evolution_loss * self.beta,
            var_list=[
                self.projection_matrix, self.ent_embeddings_A,
                self.ent_embeddings_B, self.evolution_vectors
            ])

        #Configure session
        self.sess = tf.Session()

        #Collect summary for tensorboard
        tf.summary.scalar('p_alignment_loss',
                          tf.reduce_mean(p_alignment_loss),
                          collections=['align'])
        tf.summary.scalar('n_alignment_loss',
                          tf.reduce_mean(n_alignment_loss),
                          collections=['align'])
        tf.summary.scalar('alignment_loss',
                          self.alignment_loss,
                          collections=['align'])
        tf.summary.scalar('bp_score',
                          tf.reduce_mean(bp_score),
                          collections=['b'])
        tf.summary.scalar('bn_score',
                          tf.reduce_mean(bn_score),
                          collections=['b'])
        tf.summary.scalar('b_loss', self.b_score, collections=['b'])
        tf.summary.scalar('ap_score',
                          tf.reduce_mean(ap_score),
                          collections=['a'])
        tf.summary.scalar('an_score',
                          tf.reduce_mean(an_score),
                          collections=['a'])
        tf.summary.scalar('a_loss', self.a_score, collections=['a'])
        tf.summary.scalar('n_evolve_score',
                          tf.reduce_mean(n_evolve_score),
                          collections=['evolve'])
        tf.summary.scalar('p_evolve_score',
                          tf.reduce_mean(p_evolve_score),
                          collections=['evolve'])
        tf.summary.scalar('evolution_loss',
                          self.evolution_loss,
                          collections=['evolve'])

        self.merged_align = tf.summary.merge_all(key='align')
        self.merged_a = tf.summary.merge_all(key='a')
        self.merged_b = tf.summary.merge_all(key='b')
        self.merged_evolve = tf.summary.merge_all(key='evolve')
        self.align_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '_align', self.sess.graph)
        self.a_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '_a', self.sess.graph)
        self.b_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '_b', self.sess.graph)
        self.evolve_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '_evolve', self.sess.graph)
Ejemplo n.º 6
0
    def __init__(self,
                 graph_veg,
                 dimension=10,
                 batchSize=100,
                 learning_rate=0.1,
                 margin=1,
                 regularizer_scale=0.1,
                 neg_rate=1,
                 neg_rel_rate=0):
        logger.info("Begin generating RLTransE embeddings with dimension : %d",
                    dimension)

        self.dimension = dimension  #Embedding Dimension
        self.batchSize = batchSize  #BatchSize for Stochastic Gradient Decent
        self.learning_rate = learning_rate  #Learning rate for optmizer
        self.margin = margin  #margin or bias used for loss computation
        self.relation_value_map = graph_veg.relation_value_map  #List of entities in Knowledge graph
        self.relation = graph_veg.relation  #List of relationships in Knowledge Graph
        self.neg_rate = neg_rate  #Number of Negative samples to generate by replacing head or tail
        self.neg_rel_rate = neg_rel_rate  #Number fo negative samples by replacing realtion

        # List of triples. Remove last incomplete batch if any.
        self.triples = np.array(graph_veg.train_triples[0:(
            len(graph_veg.train_triples) -
            len(graph_veg.train_triples) % batchSize)])
        self.ntriples = []
        for index in range(len(self.relation)):
            rel_triples = [(h, t, r) for (h, t, r) in self.triples
                           if r == index]
            val_count = len(self.relation_value_map[self.relation[index]])
            self.ntriples.extend(
                get_negative_samples(rel_triples,
                                     val_count,
                                     val_count,
                                     len(self.relation), [],
                                     neg_rate=neg_rate,
                                     neg_rel_rate=neg_rel_rate))
        self.ntriples = np.array(self.ntriples)
        logger.info("Shape of triples: %s", str(self.triples.shape))
        logger.info("Shape of neg triples: %s", str(self.ntriples.shape))

        self.val_triples = np.array(
            graph_veg.val_triples[0:(len(graph_veg.val_triples) -
                                     len(graph_veg.val_triples) % batchSize)])
        self.val_ntriples = []
        for index in range(len(self.relation)):
            rel_triples = [(h, t, r) for (h, t, r) in self.val_triples
                           if r == index]
            value_count = len(self.relation_value_map[self.relation[index]])
            self.val_ntriples.extend(
                get_negative_samples(rel_triples,
                                     value_count,
                                     value_count,
                                     len(self.relation), [],
                                     neg_rate=neg_rate,
                                     neg_rel_rate=neg_rel_rate))
        self.val_ntriples = np.array(self.val_ntriples)
        logger.info("Shape of val triples: %s", str(self.val_triples.shape))
        logger.info("Shape of val neg triples: %s",
                    str(self.val_ntriples.shape))

        #Define Embedding Variables
        initializer = tf.contrib.layers.xavier_initializer(uniform=True)
        regularizer = tf.contrib.layers.l2_regularizer(scale=regularizer_scale)

        self.max_val_count = 0
        for index in range(len(self.relation)):
            val_count = len(self.relation_value_map[self.relation[index]])
            if val_count > self.max_val_count:
                self.max_val_count = val_count

        self.val_embeddings = tf.get_variable(
            name="val_embeddings",
            shape=[len(self.relation) * self.max_val_count, dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.rel_embeddings = tf.get_variable(
            name="rel_embeddings",
            shape=[len(self.relation), dimension],
            initializer=initializer,
            regularizer=regularizer)

        #Define Placeholders for input
        self.head = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.tail = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.rel = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_head = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_tail = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])
        self.neg_rel = tf.placeholder(
            tf.int32, shape=[self.batchSize, (neg_rate + neg_rel_rate)])

        pos_h = tf.nn.embedding_lookup(
            self.val_embeddings, self.head + self.rel * self.max_val_count)
        pos_t = tf.nn.embedding_lookup(
            self.val_embeddings, self.tail + self.rel * self.max_val_count)
        pos_r = tf.nn.embedding_lookup(self.rel_embeddings, self.rel)
        pos_nh = tf.nn.embedding_lookup(
            self.val_embeddings,
            self.neg_head + self.neg_rel * self.max_val_count)
        pos_nt = tf.nn.embedding_lookup(
            self.val_embeddings,
            self.neg_tail + self.neg_rel * self.max_val_count)
        pos_nr = tf.nn.embedding_lookup(self.rel_embeddings, self.neg_rel)

        #Normalize embeddings
        pos_h = tf.nn.l2_normalize(pos_h, [1, 2])
        pos_t = tf.nn.l2_normalize(pos_t, [1, 2])
        pos_r = tf.nn.l2_normalize(pos_r, [1, 2])
        pos_nh = tf.nn.l2_normalize(pos_nh, [1, 2])
        pos_nt = tf.nn.l2_normalize(pos_nt, [1, 2])
        pos_nr = tf.nn.l2_normalize(pos_nr, [1, 2])

        logger.info("Pos Triple Shapes: %s, %s, %s", str(pos_h.shape),
                    str(pos_t.shape), str(pos_r.shape))
        logger.info("Neg Triple Shapes: %s, %s, %s", str(pos_nh.shape),
                    str(pos_nt.shape), str(pos_nr.shape))

        #Compute loss
        _p_score = self._calc(pos_h, pos_t, pos_r)
        _n_score = self._calc(pos_nh, pos_nt, pos_nr)

        p_score = tf.reduce_sum(tf.reduce_mean(_p_score, 1, keepdims=False),
                                1,
                                keepdims=True)
        n_score = tf.reduce_sum(tf.reduce_mean(_n_score, 1, keepdims=False),
                                1,
                                keepdims=True)

        logger.info("PScore Shape %s. N_score Shape: %s", str(p_score.shape),
                    str(n_score.shape))

        self.loss = tf.reduce_sum(
            tf.maximum(p_score - n_score + self.margin, 0))

        #collect summary parameters
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('pos_score', tf.reduce_mean(p_score))
        tf.summary.scalar('neg_score', tf.reduce_mean(n_score))

        #Configure optimizer
        self.optimizer = tf.train.GradientDescentOptimizer(
            self.learning_rate).minimize(self.loss)

        #Configure session
        self.sess = tf.Session()

        #Confirgure summary location
        self.merged = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '/train', self.sess.graph)
        self.validation_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '/val', self.sess.graph)

        #Configure Saver
        self.saver = tf.train.Saver()
Ejemplo n.º 7
0
    def __init__(self,
                 graph_ear,
                 dimension=10,
                 learning_rate=0.1,
                 batchSize=100,
                 margin=1,
                 regularizer_scale=0.1,
                 neg_rate=1,
                 neg_rel_rate=0):
        self.entity = graph_ear.entity
        self.attribute = graph_ear.attribute
        self.relation = graph_ear.relation
        self.value = graph_ear.value
        self.dimension = dimension
        self.learning_rate = learning_rate
        self.batchSize = batchSize
        self.margin = margin
        self.neg_rate = neg_rate
        self.neg_rel_rate = neg_rel_rate
        self.entity_pairs = graph_ear.entity_pairs
        self.true_pairs = graph_ear.true_pairs

        #Build attr weight vector
        self.attr_weights = []
        for a in self.attribute:
            self.attr_weights.append(SEEA.special_attr_weight_dict.get(a, 1.0))
        logger.info("Using attr weights as: %s", str(self.attr_weights))
        self.attr_weights = tf.constant(self.attr_weights, dtype=tf.float32)
        logger.info("Attr wt Tensor: %s", str(self.attr_weights))

        # List of triples. Remove last incomplete batch if any.
        self.atriples = np.array(
            graph_ear.atriples[0:(len(graph_ear.atriples) -
                                  len(graph_ear.atriples) % batchSize)])
        self.rtriples = np.array(
            graph_ear.rtriples[0:(len(graph_ear.rtriples) -
                                  len(graph_ear.rtriples) % batchSize)])
        logger.info("Modified Atriples size: %d", len(self.atriples))
        logger.info("Modified Rtriples size: %d", len(self.rtriples))

        #Collect Negative Samples
        self.natriples = np.array(
            get_negative_samples(self.atriples,
                                 len(self.entity),
                                 len(self.value),
                                 len(self.attribute), [],
                                 neg_rate=neg_rate,
                                 neg_rel_rate=neg_rel_rate))
        self.nrtriples = np.array(
            get_negative_samples(self.rtriples,
                                 len(self.entity),
                                 len(self.entity),
                                 len(self.relation),
                                 self.entity_pairs,
                                 neg_rate=neg_rate,
                                 neg_rel_rate=neg_rel_rate))

        #Define Embedding Variables
        initializer = tf.contrib.layers.xavier_initializer(uniform=False)
        regularizer = tf.contrib.layers.l2_regularizer(scale=regularizer_scale)

        self.ent_embeddings = tf.get_variable(
            name="ent_embeddings",
            shape=[len(self.entity), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.rel_embeddings = tf.get_variable(
            name="rel_embeddings",
            shape=[len(self.relation), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.attr_embeddings = tf.get_variable(
            name="attr_embeddings",
            shape=[len(self.attribute), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.val_embeddings = tf.get_variable(
            name="val_embeddings",
            shape=[len(self.value), dimension],
            initializer=initializer,
            regularizer=regularizer)
        self.projection_matrix = tf.get_variable(
            name="projection_matrix",
            shape=[len(self.attribute), dimension],
            initializer=initializer,
            regularizer=regularizer)

        #Define Placeholders for input
        self.head = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.tail = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.rel = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_head = tf.placeholder(
            tf.int32,
            shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_tail = tf.placeholder(
            tf.int32,
            shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_rel = tf.placeholder(
            tf.int32,
            shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])

        self.attr_head = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.val = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.attr = tf.placeholder(tf.int32, shape=[self.batchSize, 1])
        self.neg_attr_head = tf.placeholder(
            tf.int32,
            shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_val = tf.placeholder(
            tf.int32,
            shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])
        self.neg_attr = tf.placeholder(
            tf.int32,
            shape=[self.batchSize, (self.neg_rel_rate + self.neg_rate)])

        #Load Attr Weights
        self.p_attr_wt = tf.map_fn(lambda a: tf.map_fn(
            lambda x: self.attr_weights[x], a, dtype=tf.float32),
                                   self.attr,
                                   dtype=tf.float32)
        self.n_attr_wt = tf.map_fn(lambda a: tf.map_fn(
            lambda x: self.attr_weights[x], a, dtype=tf.float32),
                                   self.neg_attr,
                                   dtype=tf.float32)
        logger.info("Tensor of Pos Attr Wt.: %s", str(self.p_attr_wt))
        logger.info("Tensor of Neg Attr Wt.: %s", str(self.n_attr_wt))
        self.p_attr_wt = tf.cast(
            tf.tile(tf.expand_dims(self.p_attr_wt, 2), [1, 1, self.dimension]),
            tf.float32)
        self.n_attr_wt = tf.cast(
            tf.tile(tf.expand_dims(self.n_attr_wt, 2), [1, 1, self.dimension]),
            tf.float32)
        logger.info("Tensor of Pos Attr Wt.: %s", str(self.p_attr_wt))
        logger.info("Tensor of Neg Attr Wt.: %s", str(self.n_attr_wt))

        #Load Embedding Vectors
        pos_h = tf.nn.embedding_lookup(self.ent_embeddings, self.head)
        pos_t = tf.nn.embedding_lookup(self.ent_embeddings, self.tail)
        pos_r = tf.nn.embedding_lookup(self.rel_embeddings, self.rel)
        pos_nh = tf.nn.embedding_lookup(self.ent_embeddings, self.neg_head)
        pos_nt = tf.nn.embedding_lookup(self.ent_embeddings, self.neg_tail)
        pos_nr = tf.nn.embedding_lookup(self.rel_embeddings, self.neg_rel)
        pos_attr_h = tf.nn.embedding_lookup(self.ent_embeddings,
                                            self.attr_head)
        pos_val = tf.nn.embedding_lookup(self.val_embeddings, self.val)
        pos_attr = tf.nn.embedding_lookup(self.attr_embeddings, self.attr)
        pos_attr_nh = tf.nn.embedding_lookup(self.ent_embeddings,
                                             self.neg_attr_head)
        pos_attr_nv = tf.nn.embedding_lookup(self.val_embeddings, self.neg_val)
        pos_attr_na = tf.nn.embedding_lookup(self.attr_embeddings,
                                             self.neg_attr)
        pos_proj = tf.nn.embedding_lookup(self.projection_matrix, self.attr)
        pos_nproj = tf.nn.embedding_lookup(self.projection_matrix,
                                           self.neg_attr)

        #Normalize Vectors
        pos_h = tf.nn.l2_normalize(pos_h, [1, 2])
        pos_t = tf.nn.l2_normalize(pos_t, [1, 2])
        pos_r = tf.nn.l2_normalize(pos_r, [1, 2])
        pos_nh = tf.nn.l2_normalize(pos_nh, [1, 2])
        pos_nt = tf.nn.l2_normalize(pos_nt, [1, 2])
        pos_nr = tf.nn.l2_normalize(pos_nr, [1, 2])
        pos_attr_h = tf.nn.l2_normalize(pos_attr_h, [1, 2])
        pos_val = tf.nn.l2_normalize(pos_val, [1, 2])
        pos_attr = tf.nn.l2_normalize(pos_attr, [1, 2])
        pos_attr_nh = tf.nn.l2_normalize(pos_attr_nh, [1, 2])
        pos_attr_nv = tf.nn.l2_normalize(pos_attr_nv, [1, 2])
        pos_attr_na = tf.nn.l2_normalize(pos_attr_na, [1, 2])
        pos_proj = tf.nn.l2_normalize(pos_proj, [1, 2])
        pos_nproj = tf.nn.l2_normalize(pos_nproj, [1, 2])

        #Project Entities to attribute space
        proj_pos_attr_h = self._transfer(pos_attr_h, pos_proj)
        proj_pos_attr_nh = self._transfer(pos_attr_nh, pos_nproj)

        #Compute Loss
        _p_score = self._calc(pos_h, pos_t, pos_r)
        _n_score = self._calc(pos_nh, pos_nt, pos_nr)

        _ap_score = self._attr_calc(proj_pos_attr_h, pos_val, pos_attr)
        _an_score = self._attr_calc(proj_pos_attr_nh, pos_attr_nv, pos_attr_na)
        logger.info("Shape of APSCORE.: %s", str(_ap_score.shape))
        logger.info("Shape of ANSCORE.: %s", str(_an_score.shape))

        _wap_score = tf.math.multiply(_ap_score, self.p_attr_wt)
        _wan_score = tf.math.multiply(_an_score, self.n_attr_wt)
        logger.info("Shape of APSCORE.: %s", str(_wap_score.shape))
        logger.info("Shape of ANSCORE.: %s", str(_wan_score.shape))

        p_score = tf.reduce_sum(tf.reduce_mean(_p_score, 1, keepdims=False),
                                axis=1,
                                keepdims=True)
        n_score = tf.reduce_sum(tf.reduce_mean(_n_score, 1, keepdims=False),
                                axis=1,
                                keepdims=True)
        ap_score = tf.reduce_sum(tf.reduce_mean(_wap_score, 1, keepdims=False),
                                 axis=1,
                                 keepdims=True)
        an_score = tf.reduce_sum(tf.reduce_mean(_wan_score, 1, keepdims=False),
                                 axis=1,
                                 keepdims=True)
        logger.info("Shape of APSCORE*.: %s", str(ap_score.shape))
        logger.info("Shape of ANSCORE*.: %s", str(an_score.shape))
        self.rel_loss = tf.reduce_sum(
            tf.maximum(p_score - n_score + self.margin, 0))
        self.attr_loss = tf.reduce_sum(
            tf.maximum(ap_score - an_score + self.margin, 0))

        #Configure optimizer
        self.rel_optimizer = tf.train.GradientDescentOptimizer(
            self.learning_rate).minimize(self.rel_loss)
        self.attr_optimizer = tf.train.GradientDescentOptimizer(
            self.learning_rate).minimize(self.attr_loss)

        #Configure session
        self.sess = tf.Session()

        #Collect summary for tensorboard
        tf.summary.scalar('attr_loss', self.attr_loss, collections=['attr'])
        tf.summary.scalar('rel_loss', self.rel_loss, collections=['rel'])
        tf.summary.scalar('p_score',
                          tf.reduce_mean(p_score),
                          collections=['rel'])
        tf.summary.scalar('n_score',
                          tf.reduce_mean(n_score),
                          collections=['rel'])
        tf.summary.scalar('ap_score',
                          tf.reduce_mean(ap_score),
                          collections=['attr'])
        tf.summary.scalar('an_score',
                          tf.reduce_mean(an_score),
                          collections=['attr'])

        self.merged_attr = tf.summary.merge_all(key='attr')
        self.merged_rel = tf.summary.merge_all(key='rel')
        self.attr_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '_attr', self.sess.graph)
        self.rel_summary_writer = tf.summary.FileWriter(
            get_tf_summary_file_path(logger) + '_rel', self.sess.graph)