def rank(self, test_triplets, filter_mask, entry): assert entry == 0 or entry == 1 or entry == 2, "Entry is not {0,1,2}." head_ids, rel_ids, tail_ids = tf_ops.split_triplet(test_triplets) target_emb = self.relation_embeddings if entry == 1 else self.entity_embeddings if entry == 0: rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids) tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids) predicted = self.predict_head(rel_emb, tail_emb) pivots = head_ids elif entry == 1: head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids) tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids) predicted = self.predict_relation(head_emb, tail_emb) pivots = rel_ids elif entry == 2: head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids) rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids) predicted = self.predict_tail(head_emb, rel_emb) pivots = tail_ids """A special case arises, and is also supported, where each of the input arrays has a degenerate dimension at a different index. In this case, the result is an "outer operation": (2,1) and (1,3) broadcast to (2,3). For more examples, consult the Numpy documentation on broadcasting.""" predicted = tf.expand_dims(predicted, 1) target_emb = tf.expand_dims(target_emb, 0) # Calculate distance distances = self.distance_func(target_emb, predicted, axis=-1) scores = self.score_func(distances) return self._rank_by_score_with_pivots(scores, pivots, filter_mask, ascending_order=True)
def rank(self, test_triplets, filter_mask, entry): """ Processes a batch of test-triplets and ranks the positive triplet's entry (head, relation, or tail) accordingly against all other entities in the datasat. Returns a raw rank and a filtered rank (with other correct entities removed, as they might rank before the positive target entry). """ assert entry == 0 or entry == 1 or entry == 2, "Entry is not {0,1,2}." head_ids, rel_ids, tail_ids = tf_ops.split_triplet(test_triplets) if entry == 0: rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids) tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids) scores = self.score_func(self.entity_embeddings, rel_emb, tail_emb, 'nd', n_dim=self.dataset.n_entities, arg_right=0) pivots = head_ids elif entry == 1: head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids) tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids) scores = self.score_func(head_emb, self.relation_embeddings, tail_emb, 'nd', n_dim=self.dataset.n_relations, arg_right=1) pivots = rel_ids elif entry == 2: head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids) rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids) scores = self.score_func(head_emb, rel_emb, self.entity_embeddings, 'nd', n_dim=self.dataset.n_entities, arg_right=2) pivots = tail_ids return self._rank_by_score_with_pivots(scores, pivots, filter_mask, ascending_order=False)
def _generate_negative_samples(self, positive_triplets, k_negative_samples, sample_negative_relations, return_split_size=False, rel_ratio=0.5, n_relations=2): if sample_negative_relations: N_split = int(k_negative_samples / 3) N_r = min(N_split, int(rel_ratio * n_relations)) N_split = int((k_negative_samples - N_r) / 2) else: N_split = int(k_negative_samples / 2) N_r = None head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets) # Sample K negative ("corrupted") samples per positive sample neg_head_ids = eknog.tf_ops.negative_sampling_uniform( positive_triplets, N_split, self.dataset.n_entities) if sample_negative_relations: if self.dataset.n_relations < 0.3 * N_split: neg_rel_ids = eknog.tf_ops.negative_sampling_uniform_with_exclusion( rel_ids, N_r, self.dataset.n_relations) else: # this much cheaper, is if the number of relations is large, do this one neg_rel_ids = eknog.tf_ops.negative_sampling_uniform( positive_triplets, N_r, self.dataset.n_relations) else: neg_rel_ids = None neg_tail_ids = eknog.tf_ops.negative_sampling_uniform( positive_triplets, N_split, self.dataset.n_entities) if not return_split_size: return neg_head_ids, neg_rel_ids, neg_tail_ids else: return neg_head_ids, neg_rel_ids, neg_tail_ids, N_split, N_r
def _lookup_test_embs(self, test_triplets, test_targets, entry, entity_var, relation_var, expand_dims=True): head_ids, rel_ids, tail_ids = tf_ops.split_triplet(test_triplets) cargs = {"expand_dim": 1} if expand_dims else {} if entry == 0: # Predict heads head_emb = tf_ops.emb_lookup(entity_var, test_targets) rel_emb = tf_ops.emb_lookup(relation_var, rel_ids, **cargs) tail_emb = tf_ops.emb_lookup(entity_var, tail_ids, **cargs) elif entry == 1: # Predict relations head_emb = tf_ops.emb_lookup(entity_var, head_ids, **cargs) rel_emb = tf_ops.emb_lookup(relation_var, test_targets) tail_emb = tf_ops.emb_lookup(entity_var, tail_ids, **cargs) else: # Predict tails head_emb = tf_ops.emb_lookup(entity_var, head_ids, **cargs) rel_emb = tf_ops.emb_lookup(relation_var, rel_ids, **cargs) tail_emb = tf_ops.emb_lookup(entity_var, test_targets) return head_emb, rel_emb, tail_emb
def _standard_loss(self, positive_triplets): """ Returns a loss variable which can be optimized. :param positive_triplets: A batch (2D Tensor) of positive triplets. """ energies = [] pos_scores = [] neg_scores = [] head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets) with tf.name_scope('positive_score') as scope: # Get scores for positive test examples (Y_rso==1) cargs = {"dropout": 1 - self.config["emb_dropout"]} pos_head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids, **cargs) pos_rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids, **cargs) pos_tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids, **cargs) positive_scores = self.score_func(pos_head_emb, pos_rel_emb, pos_tail_emb, 'bd') pos_scores.append(positive_scores) with tf.name_scope('negative_sampling') as scope: neg_head_ids, neg_rel_ids, neg_tail_ids, n_dim, n_r = self._generate_negative_samples( positive_triplets, self.config["k_negative_samples"], self.config["sample_negative_relations"], return_split_size=True, n_relations=self.dataset.n_relations, rel_ratio=self.config["neg_rel_ratio"]) with tf.name_scope('negative_emb_lookup') as scope: cargs = {"dropout": 1 - self.config["emb_dropout"]} neg_head_emb = tf_ops.emb_lookup(self.entity_embeddings, neg_head_ids, **cargs) neg_tail_emb = tf_ops.emb_lookup(self.entity_embeddings, neg_tail_ids, **cargs) if self.config["sample_negative_relations"]: neg_rel_emb = tf_ops.emb_lookup(self.relation_embeddings, neg_rel_ids, **cargs) with tf.name_scope('negative_scores') as scope: score_hcr_t = self.score_func(neg_head_emb, pos_rel_emb, pos_tail_emb, 'bnd', n_dim=n_dim, arg_right=0) score_hr_tc = self.score_func(pos_head_emb, pos_rel_emb, neg_tail_emb, 'bnd', n_dim=n_dim, arg_right=2) neg_scores.append(score_hcr_t) neg_scores.append(score_hr_tc) if self.config["sample_negative_relations"]: score_hrc_t = self.score_func(pos_head_emb, neg_rel_emb, pos_tail_emb, 'bnd', n_dim=n_r, arg_right=1) neg_scores.append(score_hrc_t) if self.config["loss_type"] in ["softplus"]: # pos_scores = tf.concat(pos_scores, axis=-1) neg_scores = tf.concat(neg_scores, axis=-1) energies.append((positive_scores, neg_scores)) else: for ns in neg_scores: energies.append((positive_scores, ns)) return self._loss_out(energies)
def loss(self, positive_triplets): """ Returns a loss variable which can be optimized. :param positive_triplets: A batch (2D Tensor) of positive triplets. """ energies = [] norm_axis = -1 if self.config["normalize"] else None head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets) with tf.name_scope('negative_sampling') as scope: neg_head_ids, neg_rel_ids, neg_tail_ids = self._generate_negative_samples( positive_triplets, self.config["k_negative_samples"], self.config["sample_negative_relations"], rel_ratio=self.config["neg_rel_ratio"]) with tf.name_scope('positive_sample_embeddings') as scope: pos_head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids, normalize_axis=norm_axis, expand_dim=1, dropout=1 - self.config["emb_dropout"]) pos_rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids, expand_dim=1, dropout=1 - self.config["emb_dropout"]) pos_tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids, normalize_axis=norm_axis, expand_dim=1, dropout=1 - self.config["emb_dropout"]) with tf.name_scope('negative_sample_embeddings') as scope: neg_head_emb = tf_ops.emb_lookup(self.entity_embeddings, neg_head_ids, normalize_axis=norm_axis, dropout=1 - self.config["emb_dropout"]) neg_tail_emb = tf_ops.emb_lookup(self.entity_embeddings, neg_tail_ids, normalize_axis=norm_axis, dropout=1 - self.config["emb_dropout"]) if self.config["sample_negative_relations"]: neg_rel_emb = tf_ops.emb_lookup(self.relation_embeddings, neg_rel_ids, dropout=1 - self.config["emb_dropout"]) with tf.name_scope('predictions') as scope: hr_t_emb = self.predict_tail(pos_head_emb, pos_rel_emb) hcr_t_emb = self.predict_tail(neg_head_emb, pos_rel_emb) if self.config["sample_negative_relations"]: hrc_t_emb = self.predict_tail(pos_head_emb, neg_rel_emb) with tf.name_scope('distances') as scope: dist_hr_t = self.distance_func(hr_t_emb, pos_tail_emb, axis=-1) dist_hcr_t = self.distance_func(hcr_t_emb, pos_tail_emb, axis=-1) dist_hr_tc = self.distance_func(hr_t_emb, neg_tail_emb, axis=-1) if self.config["sample_negative_relations"]: dist_hrc_t = self.distance_func(hrc_t_emb, pos_tail_emb, axis=-1) with tf.name_scope('scores') as scope: score_hr_t = self.score_func(dist_hr_t) score_hcr_t = self.score_func(dist_hcr_t) score_hr_tc = self.score_func(dist_hr_tc) if self.config["sample_negative_relations"]: score_hrc_t = self.score_func(dist_hrc_t) with tf.name_scope('energies') as scope: energies.append((-1 * score_hr_t, -1 * score_hcr_t)) energies.append((-1 * score_hr_t, -1 * score_hr_tc)) if self.config["sample_negative_relations"]: energies.append((-1 * score_hr_t, -1 * score_hrc_t)) return self._loss_out(energies)
def loss(self, positive_triplets): energies = [] pos_scores = [] neg_scores = [] head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets) with tf.name_scope('positive_emb_lookup') as scope: # Get scores for positive test examples (Y_rso==1) pos_head_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real, head_ids) pos_head_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag, head_ids) pos_rel_emb_real = tf_ops.emb_lookup(self.relation_embeddings_real, rel_ids) pos_rel_emb_imag = tf_ops.emb_lookup(self.relation_embeddings_imag, rel_ids) pos_tail_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real, tail_ids) pos_tail_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag, tail_ids) pos_head_emb_real, pos_head_emb_imag = tf_ops.complex_dropout( pos_head_emb_real, pos_head_emb_imag, self.config["emb_dropout"]) pos_rel_emb_real, pos_rel_emb_imag = tf_ops.complex_dropout( pos_rel_emb_real, pos_rel_emb_imag, self.config["emb_dropout"]) pos_tail_emb_real, pos_tail_emb_imag = tf_ops.complex_dropout( pos_tail_emb_real, pos_tail_emb_imag, self.config["emb_dropout"]) with tf.name_scope('positive_score') as scope: # PAY ATTENTION TO THE DIFFERING ORDER RELATION->HEAD->TAIL! positive_scores = self.score_func( pos_rel_emb_real, pos_rel_emb_imag, pos_head_emb_real, pos_head_emb_imag, pos_tail_emb_real, pos_tail_emb_imag, 'bd') pos_scores.append(positive_scores) with tf.name_scope('negative_sampling') as scope: neg_head_ids, neg_rel_ids, neg_tail_ids, n_dim, n_r = self._generate_negative_samples( positive_triplets, self.config["k_negative_samples"], self.config["sample_negative_relations"], return_split_size=True, n_relations=self.dataset.n_relations, rel_ratio=self.config["neg_rel_ratio"]) with tf.name_scope('negative_emb_lookup') as scope: neg_head_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real, neg_head_ids) neg_head_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag, neg_head_ids) neg_tail_emb_real = tf_ops.emb_lookup(self.entity_embeddings_real, neg_tail_ids) neg_tail_emb_imag = tf_ops.emb_lookup(self.entity_embeddings_imag, neg_tail_ids) neg_head_emb_real, neg_head_emb_imag = tf_ops.complex_dropout( neg_head_emb_real, neg_head_emb_imag, self.config["emb_dropout"]) neg_tail_emb_real, neg_tail_emb_imag = tf_ops.complex_dropout( neg_tail_emb_real, neg_tail_emb_imag, self.config["emb_dropout"]) with tf.name_scope('negative_scores') as scope: negative_scores_head = self.score_func(pos_rel_emb_real, pos_rel_emb_imag, neg_head_emb_real, neg_head_emb_imag, pos_tail_emb_real, pos_tail_emb_imag, 'bnd', n_dim=n_dim, arg_right=1) neg_scores.append(negative_scores_head) negative_scores_tail = self.score_func(pos_rel_emb_real, pos_rel_emb_imag, pos_head_emb_real, pos_head_emb_imag, neg_tail_emb_real, neg_tail_emb_imag, 'bnd', n_dim=n_dim, arg_right=2) neg_scores.append(negative_scores_tail) if self.config["sample_negative_relations"]: neg_rel_emb_real = tf_ops.emb_lookup( self.relation_embeddings_real, neg_rel_ids) neg_rel_emb_imag = tf_ops.emb_lookup( self.relation_embeddings_imag, neg_rel_ids) neg_rel_emb_real, neg_rel_emb_imag = tf_ops.complex_dropout( neg_rel_emb_real, neg_rel_emb_imag, self.config["emb_dropout"]) negative_scores_rel = self.score_func(neg_rel_emb_real, neg_rel_emb_imag, pos_head_emb_real, pos_head_emb_imag, pos_tail_emb_real, pos_tail_emb_imag, 'bnd', n_dim=n_r, arg_right=0) neg_scores.append(negative_scores_rel) if self.config["loss_type"] in ["softplus"]: # pos_scores = tf.concat(pos_scores, axis=-1) neg_scores = tf.concat(neg_scores, axis=-1) energies.append((positive_scores, neg_scores)) else: for ns in neg_scores: energies.append((positive_scores, ns)) return self._loss_out(energies)
def loss(self, positive_triplets): """ Returns a loss variable which can be optimized. :param positive_triplets: A batch (2D Tensor) of positive triplets. """ energies = [] head_ids, rel_ids, tail_ids = tf_ops.split_triplet(positive_triplets) with tf.name_scope('negative_sampling') as scope: neg_head_ids, neg_rel_ids, neg_tail_ids = self._generate_negative_samples( positive_triplets, self.config["k_negative_samples"], self.config["sample_negative_relations"]) project_entities = tf.assign( self.entity_embeddings, tf_ops.project_to_unit_tensor(self.entity_embeddings)) project_relations = tf.assign( self.relation_embeddings, tf_ops.project_to_unit_tensor(self.relation_embeddings)) with tf.control_dependencies([project_entities, project_relations]): with tf.name_scope('positive_sample_embeddings') as scope: pos_head_emb = tf_ops.emb_lookup(self.entity_embeddings, head_ids, expand_dim=1) pos_rel_emb = tf_ops.emb_lookup(self.relation_embeddings, rel_ids, expand_dim=1) pos_tail_emb = tf_ops.emb_lookup(self.entity_embeddings, tail_ids, expand_dim=1) with tf.name_scope('negative_sample_embeddings') as scope: neg_head_emb = tf_ops.emb_lookup(self.entity_embeddings, neg_head_ids) neg_tail_emb = tf_ops.emb_lookup(self.entity_embeddings, neg_tail_ids) if self.config["sample_negative_relations"]: neg_rel_emb = tf_ops.emb_lookup(self.relation_embeddings, neg_rel_ids) with tf.name_scope('predictions') as scope: hr_t_emb = self.predict_tail(pos_head_emb, pos_rel_emb) hcr_t_emb = self.predict_tail(neg_head_emb, pos_rel_emb) if self.config["sample_negative_relations"]: hrc_t_emb = self.predict_tail(pos_head_emb, neg_rel_emb) with tf.name_scope('distances') as scope: dist_hr_t = self.distance_func(hr_t_emb, pos_tail_emb, axis=-1) dist_hcr_t = self.distance_func(hcr_t_emb, pos_tail_emb, axis=-1) dist_hr_tc = self.distance_func(hr_t_emb, neg_tail_emb, axis=-1) if self.config["sample_negative_relations"]: dist_hrc_t = self.distance_func(hrc_t_emb, pos_tail_emb, axis=-1) with tf.name_scope('scores') as scope: score_hr_t = self.score_func(dist_hr_t) score_hcr_t = self.score_func(dist_hcr_t) score_hr_tc = self.score_func(dist_hr_tc) if self.config["sample_negative_relations"]: score_hrc_t = self.score_func(dist_hrc_t) with tf.name_scope('energies') as scope: energies.append((score_hr_t, score_hcr_t)) energies.append((score_hr_t, score_hr_tc)) if self.config["sample_negative_relations"]: energies.append((score_hr_t, score_hrc_t)) if self.config["loss_type"] == "softmax" or self.config[ "loss_type"] == "softplus": neng = [] for pos, neg in energies: neng.append((-1 * pos, -1 * neg)) return self._loss_out(neng) else: return self._loss_out(energies)