Beispiel #1
0
    def train(self):
        """Train the model"""
        # Compute training vector size
        N = len(self.dataset.entities)
        M = len(self.dataset.relations)
        sz = (N, N, M)

        # Extract triples from dataset
        subs = self.dataset.train_split()
        true_triples = subs['train_subs'] + \
            subs['test_subs'] + subs['valid_subs']

        if self.train_all:
            xs = true_triples
        else:
            xs = subs['train_subs']
        ys = np.ones(len(xs))

        # Instantiate the evaluator
        if self.mode == 'rank':
            self.ev_test = self.evaluator(subs['test_subs'], true_triples,
                                          self.neval)
            self.ev_valid = self.evaluator(subs['valid_subs'], true_triples,
                                           self.neval)
        # ¹Assuming labels are if triple is either true or false:
        elif self.mode == 'lp':
            self.ev_test = self.evaluator(
                subs['test_subs'],
                # subs['test_labels'])
                np.ones(len(subs['test_subs'])))
            self.ev_valid = self.evaluator(
                subs['valid_subs'],
                # subs['valid_labels'])
                np.ones(len(subs['valid_subs'])))

        # create sampling objects
        if self.sampler == 'corrupted':
            # create type index, here it is ok to use the whole data
            sampler = sample.CorruptedSampler(self.ne, xs, ti)
        elif self.sampler == 'random-mode':
            sampler = sample.RandomModeSampler(self.ne, [0, 1], xs, sz)
        elif self.sampler == 'lcwa':
            sampler = sample.LCWASampler(self.ne, [0, 1, 2], xs, sz)
        else:
            raise ValueError('Unknown sampler (%s)' % self.sampler)

        # Instantiate trainer
        trn = self.setup_trainer(sz, sampler)
        print("Fitting model %s with trainer %s" %
              (trn.model.__class__.__name__, trn.__class__.__name__))
        # Start trainer
        trn.fit(xs, ys)
        self.callback(trn, with_eval=True)

        return trn
Beispiel #2
0
    def train(self):
        # read data
        data = self.read_data()

        N = len(data['entities'])
        M = len(data['relations'])
        sz = (N, N, M)

        true_triples = data['train_subs'] + data['test_subs'] + data[
            'valid_subs']
        # true_triples = data['train_subs']
        if self.args.mode == 'rank':
            self.ev_test = self.evaluator(data['test_subs'], true_triples,
                                          self.neval)
            self.ev_valid = self.evaluator(data['valid_subs'], true_triples,
                                           self.neval)
        elif self.args.mode == 'lp':
            self.ev_test = self.evaluator(data['test_subs'],
                                          data['test_labels'])
            self.ev_valid = self.evaluator(data['valid_subs'],
                                           data['valid_labels'])

        # xs = true_triples
        xs = data['train_subs']
        ys = np.ones(len(xs))

        # create sampling objects
        if self.args.sampler == 'corrupted':
            # create type index, here it is ok to use the whole data
            sampler = sample.CorruptedSampler(self.args.ne, xs, ti)
        elif self.args.sampler == 'random-mode':
            sampler = sample.RandomModeSampler(self.args.ne, [0, 1], xs, sz)
        elif self.args.sampler == 'lcwa':
            sampler = sample.LCWASampler(self.args.ne, [0, 1, 2], xs, sz)
        else:
            raise ValueError('Unknown sampler (%s)' % self.args.sampler)

        trn = self.setup_trainer(sz, sampler)

        log.info(
            "Fitting model %s with trainer %s and parameters %s" %
            (trn.model.__class__.__name__, trn.__class__.__name__, self.args))
        trn.fit(xs, ys)
        self.callback(trn, with_eval=True, with_test=True)
Beispiel #3
0
    def train(self):
        ########
        #read data
        train_subs = []
        test_subs = []
        valid_subs = []
        with open("../data/pb/train.txt") as f:
            for line in f:
                line = line.rstrip("\n").split('\t')
                h = int(line[0])
                r = int(line[1])
                t = int(line[2])
                tuple = (h, r, t)
                train_subs.append(tuple)

        with open("../data/pb/valid.txt") as f:
            for line in f:
                line = line.rstrip("\n").split('\t')
                h = int(line[0])
                r = int(line[1])
                t = int(line[2])
                tuple = (h, r, t)
                valid_subs.append(tuple)

        with open("../data/pb/test.txt") as f:
            for line in f:
                line = line.rstrip("\n").split('\t')
                h = int(line[0])
                r = int(line[1])
                t = int(line[2])
                tuple = (h, r, t)
                test_subs.append(tuple)

        #########
        # read data
        # with open(self.args.fin, 'rb') as fin:
        #     data = pickle.load(fin)

        N = 15000
        M = 15000
        sz = (N, N, M)

        true_triples = train_subs + test_subs + valid_subs
        if self.args.mode == 'rank':
            self.ev_test = self.evaluator(test_subs, true_triples, self.neval)
            self.ev_valid = self.evaluator(valid_subs, true_triples,
                                           self.neval)
        # elif self.args.mode == 'lp':
        #     self.ev_test = self.evaluator(test_subs)
        #     self.ev_valid = self.evaluator(valid_subs)

        xs = train_subs
        ys = np.ones(len(xs))

        # create sampling objects
        if self.args.sampler == 'corrupted':
            # create type index, here it is ok to use the whole data
            sampler = sample.CorruptedSampler(self.args.ne, xs, ti)
        elif self.args.sampler == 'random-mode':
            sampler = sample.RandomModeSampler(self.args.ne, [0, 1], xs, sz)
        elif self.args.sampler == 'lcwa':
            sampler = sample.LCWASampler(self.args.ne, [0, 1, 2], xs, sz)
        else:
            raise ValueError('Unknown sampler (%s)' % self.args.sampler)

        trn = self.setup_trainer(sz, sampler)
        log.info(
            "Fitting model %s with trainer %s and parameters %s" %
            (trn.model.__class__.__name__, trn.__class__.__name__, self.args))
        #print(xs)
        # print(ys)
        trn.fit(xs, ys)
        self.callback(trn, with_eval=True)
Beispiel #4
0
    def train(self):
        # read data
        with open(self.args.fin, 'rb') as fin:
            data = pickle.load(fin)
        self.logger.setInput(data)

        self.dicte = data['entities']
        self.dictr = data['relations']
        N = len(data['entities'])
        M = len(data['relations'])
        sz = (N, N, M)
        xs_orig = data['train_subs']

        # Calculate new existential variables
        # Enrich the training with existential variables from the graph
        if self.ev == None:
            self.ev = extvars.ExtVars()
        if self.evActive:
            self.ev.load(xs_orig, data['r2e'], self.dicte, self.dictr, self.db,
                         self.minSize)
            xs = xs_orig + self.ev.enrichTrainingWithExVars(xs_orig)
        else:
            xs = xs_orig
        ys = np.ones(len(xs))

        true_triples = data['train_subs'] + data['test_subs'] + data[
            'valid_subs']
        if self.args.mode == 'rank':
            self.ev_test = self.evaluator(self.log,
                                          data['test_subs'], true_triples, N,
                                          self.ev.getNExtVars(), self.ev,
                                          self.dictr, self.dicte, None, 1.0,
                                          self.neval)
            self.ev_valid = self.evaluator(self.log, data['valid_subs'],
                                           true_triples, N,
                                           self.ev.getNExtVars(), self.ev,
                                           self.dictr, self.dicte, None,
                                           self.args.valid_sample, self.neval)
        elif self.args.mode == 'lp':
            self.ev_test = self.evaluator(data['test_subs'],
                                          data['test_labels'])
            self.ev_valid = self.evaluator(data['valid_subs'],
                                           data['valid_labels'])

        # create sampling objects
        if self.args.sampler == 'corrupted':
            # create type index, here it is ok to use the whole data
            sampler = sample.CorruptedSampler(self.args.ne, xs, ti)
        elif self.args.sampler == 'random-mode':
            sampler = sample.RandomModeSampler(self.args.ne, [0, 1], xs, sz)
        elif self.args.sampler == 'subgraph':
            sampler = sample.SubGraphsSampler(self.dicte, self.args.ne, N,
                                              self.ev.getNExtVars(), [0, 1],
                                              xs, sz, self.ev, self.db,
                                              data["r2e"])
        elif self.args.sampler == 'lcwa':
            sampler = sample.LCWASampler(self.args.ne, [0, 1, 2], xs, sz)
        else:
            raise ValueError('Unknown sampler (%s)' % self.args.sampler)

        trn = self.setup_trainer(sz, sampler, self.ev, self.existing_model)
        self.log.info(
            "Fitting model %s with trainer %s and parameters %s" %
            (trn.model.__class__.__name__, trn.__class__.__name__, self.args))
        trn.fit(xs, ys)
        self.callback(trn, with_eval=True)