Exemple #1
0
def propositionalize(knowledgeGraph=None):
    """ Translate a knowledge graph in a two-hot directed vector.

    Returns a dataset with 'x' consisting of a two-hot directed vector that represents the existence of a subject s and
    an object o, and with 'y' consisting of a one-hot vector that represents the existence of a predicate p.
    Given triple <s, p, o>, mapped keys <i, j, k>, and number of entities n, input vector x[i] == 1.0 AND
    x[k+n] == 1.0 AND x[!(i|k)] == 0.0. Equally, output vector y[j] == 1.0 AND y[!(j)] == 0.0.
    """

    if not knowledgeGraph:
        raise ValueError('Expects knowledge graph.')

    pi = progressIndicator('Translating Knowledge Graph')
    pi.start()

    try:
        n = len(knowledgeGraph.graph)  # number of triples
        data = {'x': np.zeros((n, 2*len(knowledgeGraph.entities)), dtype=np.float32),  # single-point precision to preserve memory
                'y': np.zeros((n, len(knowledgeGraph.relations)), dtype=np.float32)}

        # populate data set
        for (i, (triple)) in enumerate(knowledgeGraph.graph, 0):
            data['x'][i, knowledgeGraph.entities[triple[0]]] = 1.0
            data['y'][i, knowledgeGraph.relations[triple[1]]] = 1.0
            data['x'][i, knowledgeGraph.entities[triple[2]] + len(knowledgeGraph.entities)] = 1.0

    finally:
        pi.stop()

    return data
Exemple #2
0
def run_program(dataset, model, learner, nepoch):
    pi = progressIndicator("[Train]")
    pi.start()

    avg_error = 0.0
    for epoch in range(nepoch):
        error = 0.0
        for fold in dataset.train:
            error += learner.run(model, fold)
            fold.__reset__()

        dataset.train.__reset__()

        avg_error += error / dataset.train.size

    pi.stop()
    print(" ".ljust(len(str(nepoch))+27) + "Mean Error: {:.5f}".format(avg_error/dataset.test.size))
Exemple #3
0
    def read(self, local_path=None, remote_path=None, format=None):
        pi = progressIndicator('Reading Knowledge Graph')
        pi.start()

        try:
            if local_path is None and remote_path is None:
                raise ValueError('Path cannot be left undefined')

            self.path = local_path if local_path else remote_path

            if not format:
                format = rdflib.util.guess_format(self.path)

            self.graph.parse(self.path, format=format)

            self.update()
        finally:
            pi.stop()