Beispiel #1
0
    def learn_model(self,
                    X,
                    types=None,
                    type_hierarchy=None,
                    domains=None,
                    ranges=None):
        self.X = X
        N = self.X[0].shape[1]
        self.sz = (N, N, len(self.X))
        if self.model == "hole":
            embedding_model = HolE(self.sz,
                                   self.n_dim,
                                   rparam=self.rparam,
                                   af=afs[self.activation_function],
                                   init=self.init)
        if self.model == "transe":
            embedding_model = TransE(self.sz, self.n_dim, init=self.init)

        if self.model == "rescal":
            embedding_model = RESCAL(self.sz, self.n_dim, init=self.init)

        xs = []
        for r, slice in enumerate(self.X):
            h = slice.row
            t = slice.col
            xs = xs + zip(h, t, [r] * len(h))

        ys = np.ones(len(xs))
        if self.sample_mode == 'corrupted':
            ti = type_index(xs)
            sampler = CorruptedSampler(self.negative_examples, xs, ti)
        elif self.sample_mode == 'random':
            sampler = RandomModeSampler(self.negative_examples, [0, 1], xs,
                                        self.sz)
        elif self.sample_mode == 'lcwa':
            sampler = LCWASampler(self.negative_examples, [0, 1, 2], xs,
                                  self.sz)

        self.trainer = PairwiseStochasticTrainer(
            embedding_model,
            nbatches=self.n_batches,
            max_epochs=self.max_epochs,
            post_epoch=[callback],
            learning_rate=self.learning_rate,
            margin=self.margin,
            samplef=sampler.sample)
        self.trainer.fit(xs, ys)
        del xs, ys
 def setup_trainer(self, sz, sampler, ev, existing_model=None):
     model = HolE(sz,
                  self.args.ncomp,
                  rparam=self.args.rparam,
                  af=afs[self.args.afs],
                  inite=self.args.inite,
                  initr=self.args.initr,
                  ev=ev,
                  model=existing_model)
     if self.args.no_pairwise:
         trainer = StochasticTrainer(model,
                                     nbatches=self.args.nb,
                                     max_epochs=self.args.me,
                                     post_epoch=[self.callback],
                                     learning_rate=self.args.lr,
                                     samplef=sampler.sample)
     else:
         trainer = PairwiseStochasticTrainer(model,
                                             nbatches=self.args.nb,
                                             margin=self.args.margin,
                                             max_epochs=self.args.me,
                                             learning_rate=self.args.lr,
                                             samplef=sampler.sample,
                                             post_epoch=[self.callback])
     return trainer
Beispiel #3
0
    def setup_trainer(self, sz, sampler):
        norm = True if self.args.norm == 'l1' else False
        model = TransE(sz, self.args.ncomp, l1=norm, init=self.args.init)
        #pdb.set_trace()
        log.info(" sz = %s  and init_nunif = %d" % (sz, init_nunif.counter))

        # This code can dump the initial embeddings of entities into a file
        #if self.args.fembed:
        #    with open(self.args.fembed+".init", 'w') as fout:
        #        for e in model.E:
        #            es = str(e)
        #            fout.write(es)

        # Here the model is initialized (TransE())
        trainer = PairwiseStochasticTrainer(model,
                                            nbatches=self.args.nb,
                                            margin=self.args.margin,
                                            max_epochs=self.args.me,
                                            learning_rate=self.args.lr,
                                            samplef=sampler.sample,
                                            post_epoch=[self.callback],
                                            file_grad=self.args.fgrad,
                                            file_embed=self.args.fembed)

        return trainer
Beispiel #4
0
 def setup_trainer(self, sz, sampler):
     model = TransE(sz, self.args.ncomp, init=self.args.init)
     trainer = PairwiseStochasticTrainer(model,
                                         nbatches=self.args.nb,
                                         margin=self.args.margin,
                                         max_epochs=self.args.me,
                                         learning_rate=self.args.lr,
                                         samplef=sampler.sample,
                                         post_epoch=[self.callback])
     return trainer
Beispiel #5
0
 def setup_trainer(self, sz, sampler):
     norm = True if self.args.norm == 'l1' else False
     model = TransE(sz, self.args.ncomp, l1=norm, init=self.args.init)
     #pdb.set_trace()
     log.info(" sz = %s  and init_nunif = %d" % (sz, init_nunif.counter))
     #pdb.set_trace()
     # Here the model is initialized (TransE())
     trainer = PairwiseStochasticTrainer(model,
                                         nbatches=self.args.nb,
                                         margin=self.args.margin,
                                         max_epochs=self.args.me,
                                         learning_rate=self.args.lr,
                                         samplef=sampler.sample,
                                         post_epoch=[self.callback])
     return trainer
Beispiel #6
0
 def setup_trainer(self, sz, sampler):
     model = HolE(sz,
                  self.args.ncomp,
                  rparam=self.args.rparam,
                  af=afs[self.args.afs],
                  init=self.args.init)
     if self.args.no_pairwise:
         trainer = StochasticTrainer(model,
                                     nbatches=self.args.nb,
                                     max_epochs=self.args.me,
                                     post_epoch=[self.callback],
                                     learning_rate=self.args.lr,
                                     samplef=sampler.sample)
     else:
         #print ("UNM$$$ Running Pairwise stochastic trainer")
         trainer = PairwiseStochasticTrainer(model,
                                             nbatches=self.args.nb,
                                             max_epochs=self.args.me,
                                             post_epoch=[self.callback],
                                             learning_rate=self.args.lr,
                                             margin=self.args.margin,
                                             samplef=sampler.sample)
     return trainer
Beispiel #7
0
    def fit(self):
        N, M = len(self.side_info.ent_list), len(self.side_info.rel_list)
        xs = self.side_info.trpIds
        ys = [1] * len(self.side_info.trpIds)
        sz = (N, N, M)

        clean_ent_list = []
        for ent in self.side_info.ent_list:
            clean_ent_list.append(ent.split('|')[0])
        ''' Intialize embeddings '''
        if self.p.embed_init == 'glove':
            model = gensim.models.KeyedVectors.load_word2vec_format(
                self.p.embed_loc, binary=False)
            E_init = getEmbeddings(model, clean_ent_list, self.p.embed_dims)
            R_init = getEmbeddings(model, self.side_info.rel_list,
                                   self.p.embed_dims)
        else:
            E_init = np.random.rand(len(clean_ent_list), self.p.embed_dims)
            R_init = np.random.rand(len(self.side_info.rel_list),
                                    self.p.embed_dims)
        ''' Main Algorithm '''
        lambd_side = {
            'ent_wiki': self.p.lambd_wiki,
            'ent_ppdb': self.p.lambd_ppdb,
            'ent_wnet': self.p.lambd_wnet,
            'ent_morph': self.p.lambd_morph,
            'ent_idfTok': self.p.lambd_idfTok,
            'rel_ppdb': self.p.lambd_ppdb,
            'rel_wnet': self.p.lambd_wnet,
            'rel_amie': self.p.lambd_amie,
            'rel_kbp': self.p.lambd_kbp,
            'rel_morph': self.p.lambd_morph,
            'rel_idfTok': self.p.lambd_idfTok,
            'main_obj': self.p.lambd_main_obj
        }

        model = CESI((N, M, N),
                     self.p.embed_dims,
                     lambd=self.p.lambd,
                     lambd_side=lambd_side,
                     E_init=E_init,
                     R_init=R_init,
                     inp=self.side_info)
        ''' Method for getting negative samples '''
        sampler = LCWASampler(self.p.num_neg_samp, [0, 2], xs, sz)
        ''' Optimizer '''
        if self.p.trainer == 'stochastic':
            self.trainer = StochasticTrainer(
                model,  # Model
                nbatches=self.p.nbatches,  # Number of batches
                max_epochs=self.p.max_epochs,  # Max epochs
                learning_rate=self.p.lr,  # Learning rate
                af=actfun.Sigmoid,  # Activation function
                samplef=sampler.sample,  # Sampling method
                post_epoch=[self.epoch_callback]  # Callback after each epoch
            )

        else:
            self.trainer = PairwiseStochasticTrainer(
                model,  # Model
                nbatches=self.p.nbatches,  # Number of batches
                max_epochs=self.p.max_epochs,  # Max epochs
                learning_rate=self.p.lr,  # Learning rate
                af=actfun.Sigmoid,  # Activation function
                samplef=sampler.sample,  # Sampling method
                margin=self.p.margin,  # Margin
                post_epoch=[self.epoch_callback]  # Callback after each epoch
            )

        self.trainer.fit(xs, ys)

        for id in self.side_info.id2ent.keys():
            self.ent2embed[id] = self.trainer.model.E[id]
        for id in self.side_info.id2rel.keys():
            self.rel2embed[id] = self.trainer.model.R[id]
Beispiel #8
0
def train_model(method='complex',
                mode='single',
                dimension=200,
                number_of_epochs=300,
                batch_size=128,
                learning_rate=0.05,
                margin=1.0,
                number_negative_samples=10,
                optimzer='adagrad',
                l2_regularization=0.0001,
                gradient_clipping=5,
                epoch_setp_for_saving=100,
                ratio_complex_dimension=0.5):

    if method == 'hole':
        model = HolE(self.shape,
                     self.args.ncomp,
                     init=self.args.init,
                     rparam=self.args.rparam)
    elif method == 'rescal':
        model = HolE(self.shape,
                     self.args.ncomp,
                     init=self.args.init,
                     rparam=self.args.rparam)
    elif method == 'transe':
        model = HolE(self.shape,
                     self.args.ncomp,
                     init=self.args.init,
                     rparam=self.args.rparam)
    else:
        raise NotImplementedError

    if optimzer == 'sgd':
        opt = SGD
    elif optimzer == 'adagrad':
        opt = AdaGrad
    else:
        raise NotImplementedError

    if mode == 'pairwise':
        trainer = PairwiseStochasticTrainer(
            model,
            nbatches=batch_size,
            max_epochs=number_of_epochs,
            #post_epoch=[self.callback],
            learning_rate=learning_rate,
            margin=margin,
            af=af.Sigmoid)
    elif mode == 'single':
        trainer = StochasticTrainer(model,
                                    nbatches=100,
                                    max_epochs=500,
                                    post_epoch=[self.callback],
                                    learning_rate=0.1)

    pass

    if l2_regularization > 0:
        opt.set_l2_reg(l2_regularization)
    if gradient_clipping > 0:
        opt.set_gradclip(gradient_clipping)

    from .kge.transe import TransE
    model = TransE(n_entity=n_entity,
                   n_relation=n_relation,
                   margin=margin,
                   dim=dimension,
                   mode=mode)

    if mode == 'pairwise':
        trainer = PairwiseTrainer(model=model,
                                  opt=opt,
                                  save_step=args.save_step,
                                  batchsize=args.batch,
                                  logger=logger,
                                  evaluator=evaluator,
                                  valid_dat=valid_dat,
                                  n_negative=args.negative,
                                  epoch=args.epoch,
                                  model_dir=args.log)
    elif mode == 'single':
        trainer = SingleTrainer(model=model,
                                opt=opt,
                                save_step=args.save_step,
                                batchsize=args.batch,
                                logger=logger,
                                evaluator=evaluator,
                                valid_dat=valid_dat,
                                n_negative=args.negative,
                                epoch=args.epoch,
                                model_dir=args.log)

    pass
Beispiel #9
0
class SKGEWrapper(ErrorDetector):
    def __init__(self, n_dim=150, n_batches=100, max_epochs=500, learning_rate=0.1, margin=0.2, rparam=0.1,
                 negative_examples=1, init="nunif", activation_function="sigmoid", model="hole", sample_mode="lcwa"):
        self.n_dim = n_dim
        self.n_batches = n_batches
        self.max_epochs = max_epochs
        self.learning_rate = learning_rate
        self.margin = margin
        self.rparam = rparam
        self.negative_examples = negative_examples
        self.init = init
        self.activation_function = activation_function
        self.model = model
        self.sample_mode = sample_mode

    def learn_model(self, X, types=None, type_hierarchy=None, domains=None, ranges=None):
        self.X = X
        N = self.X[0].shape[1]
        self.sz = (N, N, len(self.X))
        if self.model == "hole":
            embedding_model = HolE(
                self.sz,
                self.n_dim,
                rparam=self.rparam,
                af=afs[self.activation_function],
                init=self.init
            )
        if self.model == "transe":
            embedding_model = TransE(self.sz, self.n_dim, init=self.init)

        if self.model == "rescal":
            embedding_model = RESCAL(self.sz, self.n_dim, init=self.init)

        xs = []
        for r, slice in enumerate(self.X):
            h = slice.row
            t = slice.col
            xs = xs + zip(h, t, [r] * len(h))

        ys = np.ones(len(xs))
        if self.sample_mode == 'corrupted':
            ti = type_index(xs)
            sampler = CorruptedSampler(self.negative_examples, xs, ti)
        elif self.sample_mode == 'random':
            sampler = RandomModeSampler(self.negative_examples, [0, 1], xs, self.sz)
        elif self.sample_mode == 'lcwa':
            sampler = LCWASampler(self.negative_examples, [0, 1, 2], xs, self.sz)

        self.trainer = PairwiseStochasticTrainer(
            embedding_model,
            nbatches=self.n_batches,
            max_epochs=self.max_epochs,
            post_epoch=[callback],
            learning_rate=self.learning_rate,
            margin=self.margin,
            samplef=sampler.sample
        )
        self.trainer.fit(xs, ys)
        del xs, ys

    def compute_scores(self):
        return self.predict_proba(self.true_triples)

    def detect_errors(self):
        pass

    def predict_proba(self, triples):
        sp = [s for s, o, p in triples]
        pp = [p for s, o, p in triples]
        op = [o for s, o, p in triples]
        if self.model == "hole":
            return np.sum(self.trainer.model.R[pp] * ccorr(self.trainer.model.E[sp], self.trainer.model.E[op]), axis=1)

        if self.model == "transe":
            score = self.trainer.model.E[sp] + self.trainer.model.R[pp] - self.trainer.model.E[op]
            return - np.sum(score ** 2, axis=1)

        if self.model == "rescal":
            return np.array([
                                np.dot(self.trainer.model.E[sp[i]],
                                       np.dot(self.trainer.model.W[pp[i]], self.trainer.model.E[op[i]]))
                                for i in range(len(sp))
                                ])

    def predict(self, triples):
        return (self.predict_proba(triples) > 0).astype(float)

    def prepare(self, mdl, p):
        if self.model == "hole":
            self.ER = ccorr(mdl.R[p], mdl.E)
        if self.model == "transe":
            self.ER = mdl.E + mdl.R[p]
        if self.model == "rescal":
            self.EW = np.mat(mdl.E) * np.mat(mdl.W[p])

    def scores_s(self, o, p):
        if self.model == "hole":
            return np.dot(self.trainer.model.E, self.ER[o])
        if self.model == "transe":
            return -np.sum(np.abs(self.ER - self.trainer.model.E[o]), axis=1)
        if self.model == "rescal":
            return -np.sum(np.abs(self.EW - self.trainer.model.E[o]), axis=1)

    def scores_o(self, s, p):
        if self.model == "hole":
            return np.dot(self.ER, self.trainer.model.E[s])
        if self.model == "transe":
            return -np.sum(np.abs(self.ER[s] - self.trainer.model.E), axis=1)
        if self.model == "rescal":
            return -np.sum(np.abs(self.EW[s] - self.trainer.model.E), axis=1)