def _get_indexed_samples(self, indices): graph_lists = [] targets = [] for index in indices: sentence = graph.copy_graph( self._questions_data[index]) # type: Dict question_tokens = self.get_question_tokens(index) del sentence['tokens'] graph_list = [(sentence, 1.0 * self._p.get("mult.f1.by", 1.0))] negative_pool_size = self._p.get("max.negative.samples", 30) - len(graph_list) negative_pool = self._get_negative_instances_for_sentence( negative_pool_size, graph_list) negative_pool = [(g, 0.0) for g in negative_pool] one_negative = negative_pool[-1] instance = graph_list + negative_pool[:-1] np.random.shuffle(instance) instance = [one_negative] + instance target = [g[1] for g in instance] + [0.0] * ( self._p.get("max.negative.samples", 30) - len(instance)) instance = [el[0] for el in instance] graph_lists.append((question_tokens, instance)) targets.append(target) return graph_lists, np.asarray(targets)
def _get_negative_instances_for_sentence(self, pool_size, graph_list): sentence = graph_list[0][0] negative_pool = [] for i in range(pool_size): neg_graph = graph.copy_graph(sentence) for edge in neg_graph['edgeSet']: edge['kbID'] = self._idx2property[np.random.randint( len(self._idx2property))] + "v" if "label" in edge: del edge["label"] negative_pool.append(neg_graph) return negative_pool