def makeranks(df, model, entidxs, relidxs): tt = TT("Ranker") tt.tick("making ranks") rankings = [] for _, row in df.iterrows(): rankings.append(makerank(row, model, entidxs)) tt.tock("made ranks") return rankings
def evaluate(self, model, data, labels): tt = TT("Evaluator") preds = model.predict(data) for n in range(data.shape[0]): # iterate over all examples #tt.progress(n, data.shape[0]) for metric in self.metrics: metric(labels[n], preds[n]) results = {} for metric in self.metrics: results[metric.name] = metric() tt.tock("computed") return results
def evaluate(self, model, data, labels): tt = TT("Evaluator") entidxs = set(data[:, 0]).union(set(labels)) for n in range(data.shape[0]): ranking = self.makerank(data[n], model, entidxs) tt.progress(n, data.shape[0]) for metric in self.metrics: metric([labels[n]], ranking) results = {} for metric in self.metrics: results[metric.name] = metric() tt.tock("computed") return results
def evaluate(data, model, *metrics): tt = TT("Evaluator") datadf = pd.DataFrame(data) datadf.columns = ["s", "r", "o"] datadfgb = datadf.groupby(by=["s", "r"]).apply(lambda x: set(x["o"].unique())).reset_index() datadf.describe() entidxs = set(datadf["s"].unique()).union(set(datadf["o"].unique())) results = {} tt.tock("initialized").tick() for n, row in datadfgb.iterrows(): ranking = makerank(row, model, entidxs) tt.progress(n, datadfgb.shape[0]) for metric in metrics: metric(row, ranking) for metric in metrics: results[metric.name] = metric() tt.tock("computed") return results
def evaluate(data, model, *metrics): tt = TT("Evaluator") datadf = pd.DataFrame(data) datadf.columns = ["s", "r", "o"] datadfgb = datadf.groupby( by=["s", "r"]).apply(lambda x: set(x["o"].unique())).reset_index() datadf.describe() entidxs = set(datadf["s"].unique()).union(set(datadf["o"].unique())) results = {} tt.tock("initialized").tick() for n, row in datadfgb.iterrows(): ranking = makerank(row, model, entidxs) tt.progress(n, datadfgb.shape[0]) for metric in metrics: metric(row, ranking) for metric in metrics: results[metric.name] = metric() tt.tock("computed") return results
def traincompat(entemb, erfile): # entemb: (vocabsize, dim) matrix of entity embeddings # erfile: path to file containing which entity has which relation # params negrate = 3 numbats = 100 # 100 epochs = 200 #20 wreg = 0.0000001 evalinter = 1 lr = 0.001/numbats # 8 toy = False tt = TT("data") if toy: dims = 10 numbats=10 wreg = 0.0 lr=0.1/numbats datafileprefix = "../../data/" tensorfile = "toy.ssd" vocabsize=11 epochs=100 else: # get the data and split datafileprefix = "../../data/nycfilms/triples.flat/" fulldic = loaddic(datafileprefix+"compatreldic.flatidx.pkl") vocabsize = len(fulldic) data = loaddata(datafileprefix+erfile) data = data.keys.lok trainX = data[:, :1] labels = data[:, 1] tt.tock("loaded") # train model model = FixedEntCompat(entembs=entemb, vocabsize=vocabsize, maxiter=epochs, wreg=wreg, numbats=numbats, negrate=negrate)\ .autosave.normalize \ + SGD(lr=lr) err = model.train(trainX, labels, evalinter=evalinter) plt.plot(err, "r") plt.show(block=True)
def run(): # params dims = 100 # 100 innerdims = dims negrate = 10 numbats = 100 # 100 epochs = 100 wreg = 0.0000001 lr = 0.01/numbats #0.0001 # for SGD lr2 = 1. evalinter = 1 rho = 0.95 ############" dims = 20 innerdims = dims#50 lr = 0.001/numbats # 8 toy = False threshold = 0.5 #paths datatt = TT("data") # get the data and split datafileprefix = "../../data/nycfilms/triples.flat/" tensorfile = "alltripletensor.train.flat.ssd" testtensorfile = "alltripletensor.test.flat.ssd" fulldic = loaddic(datafileprefix+"tripletensor.flatidx.pkl") vocabsize = len(fulldic) numrels = 20 innerdim2 = 20 data = loaddata(datafileprefix+tensorfile) data = data.keys.lok trainX = data[:, :2] labels = data[:, -1] # labels = data[:, 1:] datatt.tock("loaded") validsplit = 10 # 50 # train model trainer = Trainer(lambda: AddEKMM(numrels=numrels, dim=dims, vocabsize=vocabsize, maxiter=epochs, wreg=wreg, numbats=numbats, negrate=negrate).normalize#.autosave + SGD(lr=lr) #+ FullEGRU(dim=numrels, innerdim=innerdims, wreg=wreg, nobias=True)# nobias=True,, outpactivation=lambda x: x) ) models, err, verr, _, _, _ = trainer.train(trainX, labels, validinter=evalinter, validsplit=validsplit, validrandom=123, folds=1) for model in models: evaluation = KBCEvaluator(RecallAt(10), RecallAt(15), RecallAt(30), MeanQuantile()) d = loaddata(datafileprefix+testtensorfile).keys.lok res = evaluation.run(model, d[:, :2], d[:, 2]) print(res) # evaluation.save(res) TODO print model.predict([417], [[11307]], [9145]) print model.predict([417], [[11307]], [9156]) plt.plot(err, "r") if len(verr) > 0: plt.plot(verr, "g") plt.show(block=True)