def run(): # params dims = 10 negrate = 1 batsize = 300 epochs = 300 #paths datafileprefix = "../../data/nycfilms/" dirfwdsuffix = "direct_forward.plustypes.ssd" # get the data and split dirfwdf = open(datafileprefix+dirfwdsuffix) datadf = readdata(dirfwdf) traind, validd, testd = datadf.split((70, 15, 15), random=True) numents = int(datadf.ix[:, 0].max())+1 print numents numrels = int(datadf.ix[:, 1].max())+1 print numrels # define model inp = Input(T.imatrix()) eemb = VectorEmbed.indim(numents).outdim(dims).Wreg(l2reg(0.00001))() remb = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(0.00001))() # for debugging eembd = SymTensor(T.fmatrix()) rembd = SymTensor(T.fmatrix()) dotp = SymTensor(T.fmatrix()) out = ((inp[:, 0] >> eemb >> eembd) & (inp[:, 1] >> remb >> rembd)) >> DotProduct() >> dotp >> Tanh() # for plotting purposes: relation to relation dot product (or relation-type) r2rinp = Input(T.imatrix()) rel2rel = ((r2rinp[:, 0] >> remb) & (r2rinp[:, 1] >> remb)) >> DotProduct() outtest = Output(T.fvector()) loss = (out & outtest) >> HingeLoss() trainer = Trainer\ .batsize(batsize)\ .epochs(epochs)\ .onrun(getonrun())\ .offrun(offrun)\ .offepoch(getoffepoch(out, rel2rel))\ .onbatch(getonbatch(negrate, numents, numrels))\ .optimizer(sgd(lr=1.))\ .batchtransformer(transbat) trainer\ .loss(loss)\ trainer.train(traind.values, validd.values)\ .test(testd.values) explore(eemb, remb) # functions for interactive exploration embed()
def run(): dims = 20 negrate = 1 batsize = 100 epochs = 3000 path = "../../data/Family/SN-" traind, valid, test = family.getdata(path) lemb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000001))() remb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000001))() inp = Input(T.imatrix()) out = ((inp[:, 0] >> lemb) & (inp[:, 2] >> remb)) >> DotProduct() >> Sigmoid() outtest = Output(T.fvector()) loss = (out & outtest) >> MSELoss() trainer = Trainer\ .batsize(batsize)\ .epochs(epochs)\ .onrun(getonrun())\ .offrun(offrun)\ .offepoch(getoffepoch(out))\ .onbatch(getonbatch(negrate))\ .optimizer(sgd(lr=1.))\ .batchtransformer(transbat) trainer\ .loss(loss)\ trainer.train(traind, valid)\ .test(test)
def run(): dims = 60 negrate = 1 batsize = 100 epochs = 800 path = "../../data/Family/SN-" traind, valid, test = family.getdata(path) semb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000000001))() oemb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000000001))() remb = MatrixEmbed.indim(traind.numrels).outdim(dims).idxoffset(traind.numsubjs+2).Wreg(l2reg(0.000000001))() inp = Input(T.imatrix()) sembvec = (inp[:, 0] >> semb) sremb = (inp[:, 1] & sembvec) >> remb out = (sremb & (inp[:, 2] >> oemb)) >> DotProduct() >> Sigmoid() print out(np.asarray([[1, 720, 6]], dtype="int32")) outtest = Output(T.fvector()) loss = (out & outtest) >> MSELoss() trainer = Trainer\ .batsize(batsize)\ .epochs(epochs)\ .onrun(getonrun())\ .offrun(offrun)\ .offepoch(getoffepoch(out))\ .onbatch(getonbatch(negrate))\ .optimizer(sgd(lr=1.))\ .batchtransformer(transbat) trainer\ .loss(loss)\ trainer.train(traind, valid)\ .test(test)
def run(): # params dims = 10 negrate = 1 batsize = 300 epochs = 300 lr = 0.3 wreg = 0.000001 #paths datafileprefix = "../../data/nycfilms/" dirfwdsuffix = "direct_both.ssd" # get the data and split dirfwdf = open(datafileprefix+dirfwdsuffix) datadf = readdata(dirfwdf) traind, validd, testd = datadf.split((70, 15, 15), random=True) numents = int(datadf.ix[:, 0].max())+1 numrels = int(datadf.ix[:, 1].max())+1 # test switcher '''inp = Input(T.fmatrix()) outp = (inp[:, 0] & inp[:, 1]) >> Switcher(Sigmoid(), Ident()) res = outp(np.asarray([[-10, 0], [-10, 1]], dtype="float32")) print res print "tested switcher"''' # define model inp = Input(T.imatrix()) eemb = VectorEmbed.indim(numents).outdim(dims).Wreg(l2reg(wreg/10.))() rembfwd = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(wreg))() rembrev = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(wreg))() remb = Switcher(rembfwd, rembrev) # for debugging eembd = SymTensor(T.fmatrix()) rembd = SymTensor(T.fmatrix()) dotp = SymTensor(T.fmatrix()) out = ((inp[:, 0] >> eemb >> eembd) & ((inp[:, 1] & inp[:, 2]) >> remb >> rembd)) \ >> DotProduct() >> dotp >> Tanh() outtest = Output(T.fvector()) loss = (out & outtest) >> HingeLoss() trainer = Trainer\ .batsize(batsize)\ .epochs(epochs)\ .onrun(getonrun())\ .offrun(offrun)\ .offepoch(getoffepoch(out))\ .onbatch(getonbatch(negrate, numents, numrels))\ .optimizer(sgd(lr=lr))\ .batchtransformer(transbat) trainer\ .loss(loss)\ trainer.train(traind.values, validd.values)\ .test(testd.values) explore(eemb, rembfwd, rembrev) # functions for interactive exploration embed()
def run(): # params dims = 15 numbats = 1000 epochs = 20 wreg = 0.00000001 lr = 0.0001 #paths datafileprefix = "../../data/nycfilms/" dirfwdsuffix = "direct_both_typ.pd.pkl" # get the data and split srcdf = pickle.load(open(datafileprefix+dirfwdsuffix)) #datadf = readdata(srcdf) def pairstomatrix(srcd): datamatdf = pd.DataFrame.from_records(srcd["r"].apply(Counter)).fillna(value=0) datamatdf["e"] = srcd["e"] datamatdf.set_index("e", inplace=True) return datamatdf datamatdf = pairstomatrix(srcdf) # build intermediate dics imentdic = dict(zip(range(len(datamatdf.index.values)), datamatdf.index.values)) imreldic = dict(zip(range(len(datamatdf.columns)), datamatdf.columns)) revimentdic = {v: k for k, v in imentdic.items()} revimreldic = {v: k for k, v in imreldic.items()} numents = datamatdf.shape[0] print numents numrels = datamatdf.shape[1] print numrels # define model inp = Input(T.imatrix()) eemb = VectorEmbed.indim(numents).outdim(dims).Wreg(l2reg(wreg))() remb = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(wreg))() # for debugging eembd = SymTensor(T.fmatrix()) rembd = SymTensor(T.fmatrix()) out = ((inp[:, 0] >> eemb >> eembd) & (inp[:, 1] >> remb >> rembd)) >> DotProduct() # for plotting purposes: relation to relation dot product (or relation-type) r2rinp = Input(T.imatrix()) rel2rel = ((r2rinp[:, 0] >> remb) & (r2rinp[:, 1] >> remb)) >> DotProduct() outtest = Output(T.fvector()) loss = (out & outtest) >> SSELoss() sampler = SimpleRandomIdxBatcher(numbats=numbats) trainer = Trainer\ .loss(loss)\ .batcher(sampler)\ .epochs(epochs)\ .optimizer(sgd(lr=lr))\ .onrun(getonrun())\ .offrun(offrun)\ .offepoch(getoffepoch(out, rel2rel))\ trainer.train(datamatdf.values) explore(eemb, remb, lambda x: revimentdic[x], lambda x: revimreldic[x]) # functions for interactive exploration embed()