Ejemplo n.º 1
0
    def _get_indexed_samples(self, indices):
        graph_lists = []
        targets = []
        for index in indices:
            sentence = graph.copy_graph(
                self._questions_data[index])  # type: Dict
            question_tokens = self.get_question_tokens(index)
            del sentence['tokens']
            graph_list = [(sentence, 1.0 * self._p.get("mult.f1.by", 1.0))]
            negative_pool_size = self._p.get("max.negative.samples",
                                             30) - len(graph_list)
            negative_pool = self._get_negative_instances_for_sentence(
                negative_pool_size, graph_list)

            negative_pool = [(g, 0.0) for g in negative_pool]
            one_negative = negative_pool[-1]
            instance = graph_list + negative_pool[:-1]
            np.random.shuffle(instance)
            instance = [one_negative] + instance

            target = [g[1] for g in instance] + [0.0] * (
                self._p.get("max.negative.samples", 30) - len(instance))
            instance = [el[0] for el in instance]

            graph_lists.append((question_tokens, instance))
            targets.append(target)
        return graph_lists, np.asarray(targets)
Ejemplo n.º 2
0
 def _get_negative_instances_for_sentence(self, pool_size, graph_list):
     sentence = graph_list[0][0]
     negative_pool = []
     for i in range(pool_size):
         neg_graph = graph.copy_graph(sentence)
         for edge in neg_graph['edgeSet']:
             edge['kbID'] = self._idx2property[np.random.randint(
                 len(self._idx2property))] + "v"
             if "label" in edge:
                 del edge["label"]
         negative_pool.append(neg_graph)
     return negative_pool