def train(self): """Train the model""" # Compute training vector size N = len(self.dataset.entities) M = len(self.dataset.relations) sz = (N, N, M) # Extract triples from dataset subs = self.dataset.train_split() true_triples = subs['train_subs'] + \ subs['test_subs'] + subs['valid_subs'] if self.train_all: xs = true_triples else: xs = subs['train_subs'] ys = np.ones(len(xs)) # Instantiate the evaluator if self.mode == 'rank': self.ev_test = self.evaluator(subs['test_subs'], true_triples, self.neval) self.ev_valid = self.evaluator(subs['valid_subs'], true_triples, self.neval) # ¹Assuming labels are if triple is either true or false: elif self.mode == 'lp': self.ev_test = self.evaluator( subs['test_subs'], # subs['test_labels']) np.ones(len(subs['test_subs']))) self.ev_valid = self.evaluator( subs['valid_subs'], # subs['valid_labels']) np.ones(len(subs['valid_subs']))) # create sampling objects if self.sampler == 'corrupted': # create type index, here it is ok to use the whole data sampler = sample.CorruptedSampler(self.ne, xs, ti) elif self.sampler == 'random-mode': sampler = sample.RandomModeSampler(self.ne, [0, 1], xs, sz) elif self.sampler == 'lcwa': sampler = sample.LCWASampler(self.ne, [0, 1, 2], xs, sz) else: raise ValueError('Unknown sampler (%s)' % self.sampler) # Instantiate trainer trn = self.setup_trainer(sz, sampler) print("Fitting model %s with trainer %s" % (trn.model.__class__.__name__, trn.__class__.__name__)) # Start trainer trn.fit(xs, ys) self.callback(trn, with_eval=True) return trn
def train(self): # read data data = self.read_data() N = len(data['entities']) M = len(data['relations']) sz = (N, N, M) true_triples = data['train_subs'] + data['test_subs'] + data[ 'valid_subs'] # true_triples = data['train_subs'] if self.args.mode == 'rank': self.ev_test = self.evaluator(data['test_subs'], true_triples, self.neval) self.ev_valid = self.evaluator(data['valid_subs'], true_triples, self.neval) elif self.args.mode == 'lp': self.ev_test = self.evaluator(data['test_subs'], data['test_labels']) self.ev_valid = self.evaluator(data['valid_subs'], data['valid_labels']) # xs = true_triples xs = data['train_subs'] ys = np.ones(len(xs)) # create sampling objects if self.args.sampler == 'corrupted': # create type index, here it is ok to use the whole data sampler = sample.CorruptedSampler(self.args.ne, xs, ti) elif self.args.sampler == 'random-mode': sampler = sample.RandomModeSampler(self.args.ne, [0, 1], xs, sz) elif self.args.sampler == 'lcwa': sampler = sample.LCWASampler(self.args.ne, [0, 1, 2], xs, sz) else: raise ValueError('Unknown sampler (%s)' % self.args.sampler) trn = self.setup_trainer(sz, sampler) log.info( "Fitting model %s with trainer %s and parameters %s" % (trn.model.__class__.__name__, trn.__class__.__name__, self.args)) trn.fit(xs, ys) self.callback(trn, with_eval=True, with_test=True)
def train(self): ######## #read data train_subs = [] test_subs = [] valid_subs = [] with open("../data/pb/train.txt") as f: for line in f: line = line.rstrip("\n").split('\t') h = int(line[0]) r = int(line[1]) t = int(line[2]) tuple = (h, r, t) train_subs.append(tuple) with open("../data/pb/valid.txt") as f: for line in f: line = line.rstrip("\n").split('\t') h = int(line[0]) r = int(line[1]) t = int(line[2]) tuple = (h, r, t) valid_subs.append(tuple) with open("../data/pb/test.txt") as f: for line in f: line = line.rstrip("\n").split('\t') h = int(line[0]) r = int(line[1]) t = int(line[2]) tuple = (h, r, t) test_subs.append(tuple) ######### # read data # with open(self.args.fin, 'rb') as fin: # data = pickle.load(fin) N = 15000 M = 15000 sz = (N, N, M) true_triples = train_subs + test_subs + valid_subs if self.args.mode == 'rank': self.ev_test = self.evaluator(test_subs, true_triples, self.neval) self.ev_valid = self.evaluator(valid_subs, true_triples, self.neval) # elif self.args.mode == 'lp': # self.ev_test = self.evaluator(test_subs) # self.ev_valid = self.evaluator(valid_subs) xs = train_subs ys = np.ones(len(xs)) # create sampling objects if self.args.sampler == 'corrupted': # create type index, here it is ok to use the whole data sampler = sample.CorruptedSampler(self.args.ne, xs, ti) elif self.args.sampler == 'random-mode': sampler = sample.RandomModeSampler(self.args.ne, [0, 1], xs, sz) elif self.args.sampler == 'lcwa': sampler = sample.LCWASampler(self.args.ne, [0, 1, 2], xs, sz) else: raise ValueError('Unknown sampler (%s)' % self.args.sampler) trn = self.setup_trainer(sz, sampler) log.info( "Fitting model %s with trainer %s and parameters %s" % (trn.model.__class__.__name__, trn.__class__.__name__, self.args)) #print(xs) # print(ys) trn.fit(xs, ys) self.callback(trn, with_eval=True)
def train(self): # read data with open(self.args.fin, 'rb') as fin: data = pickle.load(fin) self.logger.setInput(data) self.dicte = data['entities'] self.dictr = data['relations'] N = len(data['entities']) M = len(data['relations']) sz = (N, N, M) xs_orig = data['train_subs'] # Calculate new existential variables # Enrich the training with existential variables from the graph if self.ev == None: self.ev = extvars.ExtVars() if self.evActive: self.ev.load(xs_orig, data['r2e'], self.dicte, self.dictr, self.db, self.minSize) xs = xs_orig + self.ev.enrichTrainingWithExVars(xs_orig) else: xs = xs_orig ys = np.ones(len(xs)) true_triples = data['train_subs'] + data['test_subs'] + data[ 'valid_subs'] if self.args.mode == 'rank': self.ev_test = self.evaluator(self.log, data['test_subs'], true_triples, N, self.ev.getNExtVars(), self.ev, self.dictr, self.dicte, None, 1.0, self.neval) self.ev_valid = self.evaluator(self.log, data['valid_subs'], true_triples, N, self.ev.getNExtVars(), self.ev, self.dictr, self.dicte, None, self.args.valid_sample, self.neval) elif self.args.mode == 'lp': self.ev_test = self.evaluator(data['test_subs'], data['test_labels']) self.ev_valid = self.evaluator(data['valid_subs'], data['valid_labels']) # create sampling objects if self.args.sampler == 'corrupted': # create type index, here it is ok to use the whole data sampler = sample.CorruptedSampler(self.args.ne, xs, ti) elif self.args.sampler == 'random-mode': sampler = sample.RandomModeSampler(self.args.ne, [0, 1], xs, sz) elif self.args.sampler == 'subgraph': sampler = sample.SubGraphsSampler(self.dicte, self.args.ne, N, self.ev.getNExtVars(), [0, 1], xs, sz, self.ev, self.db, data["r2e"]) elif self.args.sampler == 'lcwa': sampler = sample.LCWASampler(self.args.ne, [0, 1, 2], xs, sz) else: raise ValueError('Unknown sampler (%s)' % self.args.sampler) trn = self.setup_trainer(sz, sampler, self.ev, self.existing_model) self.log.info( "Fitting model %s with trainer %s and parameters %s" % (trn.model.__class__.__name__, trn.__class__.__name__, self.args)) trn.fit(xs, ys) self.callback(trn, with_eval=True)