Exemple #1
0
    def rank(self, test_triplets, filter_mask, entry):
        assert entry == 0 or entry == 1 or entry == 2, "Entry is not {0,1,2}."

        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(test_triplets)

        target_emb = self.relation_embeddings if entry == 1 else self.entity_embeddings
        if entry == 0:
            rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids)
            tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids)
            predicted = self.predict_head(rel_emb, tail_emb)
            pivots = head_ids
        elif entry == 1:
            head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids)
            tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids)
            predicted = self.predict_relation(head_emb, tail_emb)
            pivots = rel_ids
        elif entry == 2:
            head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids)
            rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids)
            predicted = self.predict_tail(head_emb, rel_emb)
            pivots = tail_ids
        """A special case arises, and is also supported, where each of the input
        arrays has a degenerate dimension at a different index. In this case, 
        the result is an "outer operation": (2,1) and (1,3) broadcast to (2,3).
        For more examples, consult the Numpy documentation on broadcasting."""
        predicted = tf.expand_dims(predicted, 1)
        target_emb = tf.expand_dims(target_emb, 0)
        # Calculate distance
        distances = self.distance_func(target_emb, predicted, axis=-1)
        scores = self.score_func(distances)

        return self._rank_by_score_with_pivots(scores,
                                               pivots,
                                               filter_mask,
                                               ascending_order=True)
Exemple #2
0
    def rank(self, test_triplets, filter_mask, entry):
        """
        Processes a batch of test-triplets and ranks the positive triplet's
        entry (head, relation, or tail) accordingly against all other entities
        in the datasat.

        Returns a raw rank and a filtered rank (with other correct entities
        removed, as they might rank before the positive target entry).
        """
        assert entry == 0 or entry == 1 or entry == 2, "Entry is not {0,1,2}."

        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(test_triplets)

        if entry == 0:
            rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids)
            tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids)
            scores = self.score_func(self.entity_embeddings,
                                     rel_emb,
                                     tail_emb,
                                     'nd',
                                     n_dim=self.dataset.n_entities,
                                     arg_right=0)
            pivots = head_ids
        elif entry == 1:
            head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids)
            tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids)
            scores = self.score_func(head_emb,
                                     self.relation_embeddings,
                                     tail_emb,
                                     'nd',
                                     n_dim=self.dataset.n_relations,
                                     arg_right=1)
            pivots = rel_ids
        elif entry == 2:
            head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids)
            rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids)
            scores = self.score_func(head_emb,
                                     rel_emb,
                                     self.entity_embeddings,
                                     'nd',
                                     n_dim=self.dataset.n_entities,
                                     arg_right=2)
            pivots = tail_ids

        return self._rank_by_score_with_pivots(scores,
                                               pivots,
                                               filter_mask,
                                               ascending_order=False)
Exemple #3
0
    def _generate_negative_samples(self,
                                   positive_triplets,
                                   k_negative_samples,
                                   sample_negative_relations,
                                   return_split_size=False,
                                   rel_ratio=0.5,
                                   n_relations=2):

        if sample_negative_relations:
            N_split = int(k_negative_samples / 3)
            N_r = min(N_split, int(rel_ratio * n_relations))
            N_split = int((k_negative_samples - N_r) / 2)
        else:
            N_split = int(k_negative_samples / 2)
            N_r = None

        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets)

        # Sample K negative ("corrupted") samples per positive sample
        neg_head_ids = eknog.tf_ops.negative_sampling_uniform(
            positive_triplets, N_split, self.dataset.n_entities)
        if sample_negative_relations:
            if self.dataset.n_relations < 0.3 * N_split:
                neg_rel_ids = eknog.tf_ops.negative_sampling_uniform_with_exclusion(
                    rel_ids, N_r, self.dataset.n_relations)
            else:
                # this much cheaper, is if the number of relations is large, do this one
                neg_rel_ids = eknog.tf_ops.negative_sampling_uniform(
                    positive_triplets, N_r, self.dataset.n_relations)
        else:
            neg_rel_ids = None
        neg_tail_ids = eknog.tf_ops.negative_sampling_uniform(
            positive_triplets, N_split, self.dataset.n_entities)

        if not return_split_size:
            return neg_head_ids, neg_rel_ids, neg_tail_ids
        else:
            return neg_head_ids, neg_rel_ids, neg_tail_ids, N_split, N_r
Exemple #4
0
    def _lookup_test_embs(self,
                          test_triplets,
                          test_targets,
                          entry,
                          entity_var,
                          relation_var,
                          expand_dims=True):
        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(test_triplets)

        cargs = {"expand_dim": 1} if expand_dims else {}
        if entry == 0:  # Predict heads
            head_emb = tf_ops.emb_lookup(entity_var, test_targets)
            rel_emb = tf_ops.emb_lookup(relation_var, rel_ids, **cargs)
            tail_emb = tf_ops.emb_lookup(entity_var, tail_ids, **cargs)
        elif entry == 1:  # Predict relations
            head_emb = tf_ops.emb_lookup(entity_var, head_ids, **cargs)
            rel_emb = tf_ops.emb_lookup(relation_var, test_targets)
            tail_emb = tf_ops.emb_lookup(entity_var, tail_ids, **cargs)
        else:  # Predict tails
            head_emb = tf_ops.emb_lookup(entity_var, head_ids, **cargs)
            rel_emb = tf_ops.emb_lookup(relation_var, rel_ids, **cargs)
            tail_emb = tf_ops.emb_lookup(entity_var, test_targets)

        return head_emb, rel_emb, tail_emb
Exemple #5
0
    def _standard_loss(self, positive_triplets):
        """
        Returns a loss variable which can be optimized.
        :param positive_triplets: A batch (2D Tensor) of positive triplets.
        """
        energies = []
        pos_scores = []
        neg_scores = []

        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets)

        with tf.name_scope('positive_score') as scope:
            # Get scores for positive test examples (Y_rso==1)
            cargs = {"dropout": 1 - self.config["emb_dropout"]}

            pos_head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids,
                                             **cargs)
            pos_rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids,
                                            **cargs)
            pos_tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids,
                                             **cargs)

            positive_scores = self.score_func(pos_head_emb, pos_rel_emb,
                                              pos_tail_emb, 'bd')
            pos_scores.append(positive_scores)

        with tf.name_scope('negative_sampling') as scope:
            neg_head_ids, neg_rel_ids, neg_tail_ids, n_dim, n_r = self._generate_negative_samples(
                positive_triplets,
                self.config["k_negative_samples"],
                self.config["sample_negative_relations"],
                return_split_size=True,
                n_relations=self.dataset.n_relations,
                rel_ratio=self.config["neg_rel_ratio"])

        with tf.name_scope('negative_emb_lookup') as scope:
            cargs = {"dropout": 1 - self.config["emb_dropout"]}
            neg_head_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                             neg_head_ids, **cargs)
            neg_tail_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                             neg_tail_ids, **cargs)

            if self.config["sample_negative_relations"]:
                neg_rel_emb = tf_ops.emb_lookup(self.relation_embeddings,
                                                neg_rel_ids, **cargs)

        with tf.name_scope('negative_scores') as scope:
            score_hcr_t = self.score_func(neg_head_emb,
                                          pos_rel_emb,
                                          pos_tail_emb,
                                          'bnd',
                                          n_dim=n_dim,
                                          arg_right=0)
            score_hr_tc = self.score_func(pos_head_emb,
                                          pos_rel_emb,
                                          neg_tail_emb,
                                          'bnd',
                                          n_dim=n_dim,
                                          arg_right=2)
            neg_scores.append(score_hcr_t)
            neg_scores.append(score_hr_tc)

            if self.config["sample_negative_relations"]:
                score_hrc_t = self.score_func(pos_head_emb,
                                              neg_rel_emb,
                                              pos_tail_emb,
                                              'bnd',
                                              n_dim=n_r,
                                              arg_right=1)
                neg_scores.append(score_hrc_t)

        if self.config["loss_type"] in ["softplus"]:
            # pos_scores = tf.concat(pos_scores, axis=-1)
            neg_scores = tf.concat(neg_scores, axis=-1)
            energies.append((positive_scores, neg_scores))
        else:
            for ns in neg_scores:
                energies.append((positive_scores, ns))

        return self._loss_out(energies)
Exemple #6
0
    def loss(self, positive_triplets):
        """
        Returns a loss variable which can be optimized.
        :param positive_triplets: A batch (2D Tensor) of positive triplets.
        """
        energies = []
        norm_axis = -1 if self.config["normalize"] else None

        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets)

        with tf.name_scope('negative_sampling') as scope:
            neg_head_ids, neg_rel_ids, neg_tail_ids = self._generate_negative_samples(
                positive_triplets,
                self.config["k_negative_samples"],
                self.config["sample_negative_relations"],
                rel_ratio=self.config["neg_rel_ratio"])

        with tf.name_scope('positive_sample_embeddings') as scope:
            pos_head_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                             head_ids,
                                             normalize_axis=norm_axis,
                                             expand_dim=1,
                                             dropout=1 -
                                             self.config["emb_dropout"])
            pos_rel_emb = tf_ops.emb_lookup(self.relation_embeddings,
                                            rel_ids,
                                            expand_dim=1,
                                            dropout=1 -
                                            self.config["emb_dropout"])
            pos_tail_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                             tail_ids,
                                             normalize_axis=norm_axis,
                                             expand_dim=1,
                                             dropout=1 -
                                             self.config["emb_dropout"])

        with tf.name_scope('negative_sample_embeddings') as scope:
            neg_head_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                             neg_head_ids,
                                             normalize_axis=norm_axis,
                                             dropout=1 -
                                             self.config["emb_dropout"])
            neg_tail_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                             neg_tail_ids,
                                             normalize_axis=norm_axis,
                                             dropout=1 -
                                             self.config["emb_dropout"])
            if self.config["sample_negative_relations"]:
                neg_rel_emb = tf_ops.emb_lookup(self.relation_embeddings,
                                                neg_rel_ids,
                                                dropout=1 -
                                                self.config["emb_dropout"])

        with tf.name_scope('predictions') as scope:
            hr_t_emb = self.predict_tail(pos_head_emb, pos_rel_emb)
            hcr_t_emb = self.predict_tail(neg_head_emb, pos_rel_emb)
            if self.config["sample_negative_relations"]:
                hrc_t_emb = self.predict_tail(pos_head_emb, neg_rel_emb)

        with tf.name_scope('distances') as scope:
            dist_hr_t = self.distance_func(hr_t_emb, pos_tail_emb, axis=-1)
            dist_hcr_t = self.distance_func(hcr_t_emb, pos_tail_emb, axis=-1)
            dist_hr_tc = self.distance_func(hr_t_emb, neg_tail_emb, axis=-1)
            if self.config["sample_negative_relations"]:
                dist_hrc_t = self.distance_func(hrc_t_emb,
                                                pos_tail_emb,
                                                axis=-1)

        with tf.name_scope('scores') as scope:
            score_hr_t = self.score_func(dist_hr_t)
            score_hcr_t = self.score_func(dist_hcr_t)
            score_hr_tc = self.score_func(dist_hr_tc)
            if self.config["sample_negative_relations"]:
                score_hrc_t = self.score_func(dist_hrc_t)

        with tf.name_scope('energies') as scope:
            energies.append((-1 * score_hr_t, -1 * score_hcr_t))
            energies.append((-1 * score_hr_t, -1 * score_hr_tc))
            if self.config["sample_negative_relations"]:
                energies.append((-1 * score_hr_t, -1 * score_hrc_t))

        return self._loss_out(energies)
Exemple #7
0
    def loss(self, positive_triplets):

        energies = []
        pos_scores = []
        neg_scores = []

        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets)

        with tf.name_scope('positive_emb_lookup') as scope:
            # Get scores for positive test examples (Y_rso==1)
            pos_head_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real,
                                                  head_ids)
            pos_head_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag,
                                                  head_ids)
            pos_rel_emb_real = tf_ops.emb_lookup(self.relation_embeddings_real,
                                                 rel_ids)
            pos_rel_emb_imag = tf_ops.emb_lookup(self.relation_embeddings_imag,
                                                 rel_ids)
            pos_tail_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real,
                                                  tail_ids)
            pos_tail_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag,
                                                  tail_ids)

            pos_head_emb_real, pos_head_emb_imag = tf_ops.complex_dropout(
                pos_head_emb_real, pos_head_emb_imag,
                self.config["emb_dropout"])
            pos_rel_emb_real, pos_rel_emb_imag = tf_ops.complex_dropout(
                pos_rel_emb_real, pos_rel_emb_imag, self.config["emb_dropout"])
            pos_tail_emb_real, pos_tail_emb_imag = tf_ops.complex_dropout(
                pos_tail_emb_real, pos_tail_emb_imag,
                self.config["emb_dropout"])

        with tf.name_scope('positive_score') as scope:
            # PAY ATTENTION TO THE DIFFERING ORDER RELATION->HEAD->TAIL!
            positive_scores = self.score_func(
                pos_rel_emb_real, pos_rel_emb_imag, pos_head_emb_real,
                pos_head_emb_imag, pos_tail_emb_real, pos_tail_emb_imag, 'bd')
            pos_scores.append(positive_scores)

        with tf.name_scope('negative_sampling') as scope:
            neg_head_ids, neg_rel_ids, neg_tail_ids, n_dim, n_r = self._generate_negative_samples(
                positive_triplets,
                self.config["k_negative_samples"],
                self.config["sample_negative_relations"],
                return_split_size=True,
                n_relations=self.dataset.n_relations,
                rel_ratio=self.config["neg_rel_ratio"])

        with tf.name_scope('negative_emb_lookup') as scope:
            neg_head_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real,
                                                  neg_head_ids)
            neg_head_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag,
                                                  neg_head_ids)
            neg_tail_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real,
                                                  neg_tail_ids)
            neg_tail_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag,
                                                  neg_tail_ids)

            neg_head_emb_real, neg_head_emb_imag = tf_ops.complex_dropout(
                neg_head_emb_real, neg_head_emb_imag,
                self.config["emb_dropout"])
            neg_tail_emb_real, neg_tail_emb_imag = tf_ops.complex_dropout(
                neg_tail_emb_real, neg_tail_emb_imag,
                self.config["emb_dropout"])

        with tf.name_scope('negative_scores') as scope:
            negative_scores_head = self.score_func(pos_rel_emb_real,
                                                   pos_rel_emb_imag,
                                                   neg_head_emb_real,
                                                   neg_head_emb_imag,
                                                   pos_tail_emb_real,
                                                   pos_tail_emb_imag,
                                                   'bnd',
                                                   n_dim=n_dim,
                                                   arg_right=1)
            neg_scores.append(negative_scores_head)

            negative_scores_tail = self.score_func(pos_rel_emb_real,
                                                   pos_rel_emb_imag,
                                                   pos_head_emb_real,
                                                   pos_head_emb_imag,
                                                   neg_tail_emb_real,
                                                   neg_tail_emb_imag,
                                                   'bnd',
                                                   n_dim=n_dim,
                                                   arg_right=2)
            neg_scores.append(negative_scores_tail)

            if self.config["sample_negative_relations"]:
                neg_rel_emb_real = tf_ops.emb_lookup(
                    self.relation_embeddings_real, neg_rel_ids)
                neg_rel_emb_imag = tf_ops.emb_lookup(
                    self.relation_embeddings_imag, neg_rel_ids)

                neg_rel_emb_real, neg_rel_emb_imag = tf_ops.complex_dropout(
                    neg_rel_emb_real, neg_rel_emb_imag,
                    self.config["emb_dropout"])

                negative_scores_rel = self.score_func(neg_rel_emb_real,
                                                      neg_rel_emb_imag,
                                                      pos_head_emb_real,
                                                      pos_head_emb_imag,
                                                      pos_tail_emb_real,
                                                      pos_tail_emb_imag,
                                                      'bnd',
                                                      n_dim=n_r,
                                                      arg_right=0)
                neg_scores.append(negative_scores_rel)

        if self.config["loss_type"] in ["softplus"]:
            # pos_scores = tf.concat(pos_scores, axis=-1)
            neg_scores = tf.concat(neg_scores, axis=-1)
            energies.append((positive_scores, neg_scores))
        else:
            for ns in neg_scores:
                energies.append((positive_scores, ns))

        return self._loss_out(energies)
Exemple #8
0
    def loss(self, positive_triplets):
        """
        Returns a loss variable which can be optimized.
        :param positive_triplets: A batch (2D Tensor) of positive triplets.
        """
        energies = []

        head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets)

        with tf.name_scope('negative_sampling') as scope:
            neg_head_ids, neg_rel_ids, neg_tail_ids = self._generate_negative_samples(
                positive_triplets, self.config["k_negative_samples"],
                self.config["sample_negative_relations"])

        project_entities = tf.assign(
            self.entity_embeddings,
            tf_ops.project_to_unit_tensor(self.entity_embeddings))
        project_relations = tf.assign(
            self.relation_embeddings,
            tf_ops.project_to_unit_tensor(self.relation_embeddings))

        with tf.control_dependencies([project_entities, project_relations]):
            with tf.name_scope('positive_sample_embeddings') as scope:
                pos_head_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                                 head_ids,
                                                 expand_dim=1)
                pos_rel_emb = tf_ops.emb_lookup(self.relation_embeddings,
                                                rel_ids,
                                                expand_dim=1)
                pos_tail_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                                 tail_ids,
                                                 expand_dim=1)

            with tf.name_scope('negative_sample_embeddings') as scope:
                neg_head_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                                 neg_head_ids)
                neg_tail_emb = tf_ops.emb_lookup(self.entity_embeddings,
                                                 neg_tail_ids)
                if self.config["sample_negative_relations"]:
                    neg_rel_emb = tf_ops.emb_lookup(self.relation_embeddings,
                                                    neg_rel_ids)

            with tf.name_scope('predictions') as scope:
                hr_t_emb = self.predict_tail(pos_head_emb, pos_rel_emb)
                hcr_t_emb = self.predict_tail(neg_head_emb, pos_rel_emb)
                if self.config["sample_negative_relations"]:
                    hrc_t_emb = self.predict_tail(pos_head_emb, neg_rel_emb)

            with tf.name_scope('distances') as scope:
                dist_hr_t = self.distance_func(hr_t_emb, pos_tail_emb, axis=-1)
                dist_hcr_t = self.distance_func(hcr_t_emb,
                                                pos_tail_emb,
                                                axis=-1)
                dist_hr_tc = self.distance_func(hr_t_emb,
                                                neg_tail_emb,
                                                axis=-1)
                if self.config["sample_negative_relations"]:
                    dist_hrc_t = self.distance_func(hrc_t_emb,
                                                    pos_tail_emb,
                                                    axis=-1)

            with tf.name_scope('scores') as scope:
                score_hr_t = self.score_func(dist_hr_t)
                score_hcr_t = self.score_func(dist_hcr_t)
                score_hr_tc = self.score_func(dist_hr_tc)
                if self.config["sample_negative_relations"]:
                    score_hrc_t = self.score_func(dist_hrc_t)

            with tf.name_scope('energies') as scope:
                energies.append((score_hr_t, score_hcr_t))
                energies.append((score_hr_t, score_hr_tc))
                if self.config["sample_negative_relations"]:
                    energies.append((score_hr_t, score_hrc_t))

        if self.config["loss_type"] == "softmax" or self.config[
                "loss_type"] == "softplus":
            neng = []
            for pos, neg in energies:
                neng.append((-1 * pos, -1 * neg))
            return self._loss_out(neng)
        else:
            return self._loss_out(energies)