Esempio n. 1
0
def run():
    # params
    dims = 10
    negrate = 1
    batsize = 300
    epochs = 300

    #paths
    datafileprefix = "../../data/nycfilms/"
    dirfwdsuffix = "direct_forward.plustypes.ssd"

    # get the data and split
    dirfwdf = open(datafileprefix+dirfwdsuffix)
    datadf = readdata(dirfwdf)
    traind, validd, testd = datadf.split((70, 15, 15), random=True)

    numents = int(datadf.ix[:, 0].max())+1
    print numents
    numrels = int(datadf.ix[:, 1].max())+1
    print numrels

    # define model
    inp = Input(T.imatrix())

    eemb = VectorEmbed.indim(numents).outdim(dims).Wreg(l2reg(0.00001))()
    remb = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(0.00001))()

    # for debugging
    eembd = SymTensor(T.fmatrix())
    rembd = SymTensor(T.fmatrix())
    dotp = SymTensor(T.fmatrix())

    out = ((inp[:, 0] >> eemb >> eembd) & (inp[:, 1] >> remb >> rembd)) >> DotProduct() >> dotp >> Tanh()

    # for plotting purposes: relation to relation dot product (or relation-type)
    r2rinp = Input(T.imatrix())
    rel2rel = ((r2rinp[:, 0] >> remb) & (r2rinp[:, 1] >> remb)) >> DotProduct()

    outtest = Output(T.fvector())

    loss = (out & outtest) >> HingeLoss()
    trainer = Trainer\
        .batsize(batsize)\
        .epochs(epochs)\
        .onrun(getonrun())\
        .offrun(offrun)\
        .offepoch(getoffepoch(out, rel2rel))\
        .onbatch(getonbatch(negrate, numents, numrels))\
        .optimizer(sgd(lr=1.))\
        .batchtransformer(transbat)
    trainer\
        .loss(loss)\

    trainer.train(traind.values, validd.values)\
           .test(testd.values)

    explore(eemb, remb)
    # functions for interactive exploration

    embed()
Esempio n. 2
0
def run():
    dims = 20
    negrate = 1
    batsize = 100
    epochs = 3000

    path = "../../data/Family/SN-"
    traind, valid, test = family.getdata(path)

    lemb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000001))()
    remb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000001))()

    inp = Input(T.imatrix())

    out = ((inp[:, 0] >> lemb) & (inp[:, 2] >> remb)) >> DotProduct() >> Sigmoid()

    outtest = Output(T.fvector())

    loss = (out & outtest) >> MSELoss()

    trainer = Trainer\
        .batsize(batsize)\
        .epochs(epochs)\
        .onrun(getonrun())\
        .offrun(offrun)\
        .offepoch(getoffepoch(out))\
        .onbatch(getonbatch(negrate))\
        .optimizer(sgd(lr=1.))\
        .batchtransformer(transbat)
    trainer\
        .loss(loss)\

    trainer.train(traind, valid)\
           .test(test)
Esempio n. 3
0
def run():
    dims = 60
    negrate = 1
    batsize = 100
    epochs = 800

    path = "../../data/Family/SN-"
    traind, valid, test = family.getdata(path)

    semb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000000001))()
    oemb = VectorEmbed.indim(traind.numsubjs+2).outdim(dims).Wreg(l2reg(0.000000001))()
    remb = MatrixEmbed.indim(traind.numrels).outdim(dims).idxoffset(traind.numsubjs+2).Wreg(l2reg(0.000000001))()


    inp = Input(T.imatrix())

    sembvec = (inp[:, 0] >> semb)

    sremb = (inp[:, 1] & sembvec) >> remb

    out = (sremb & (inp[:, 2] >> oemb)) >> DotProduct() >> Sigmoid()

    print out(np.asarray([[1, 720, 6]], dtype="int32"))

    outtest = Output(T.fvector())

    loss = (out & outtest) >> MSELoss()

    trainer = Trainer\
        .batsize(batsize)\
        .epochs(epochs)\
        .onrun(getonrun())\
        .offrun(offrun)\
        .offepoch(getoffepoch(out))\
        .onbatch(getonbatch(negrate))\
        .optimizer(sgd(lr=1.))\
        .batchtransformer(transbat)
    trainer\
        .loss(loss)\

    trainer.train(traind, valid)\
           .test(test)
Esempio n. 4
0
def run():
    # params
    dims = 10
    negrate = 1
    batsize = 300
    epochs = 300
    lr = 0.3
    wreg = 0.000001

    #paths
    datafileprefix = "../../data/nycfilms/"
    dirfwdsuffix = "direct_both.ssd"

    # get the data and split
    dirfwdf = open(datafileprefix+dirfwdsuffix)
    datadf = readdata(dirfwdf)
    traind, validd, testd = datadf.split((70, 15, 15), random=True)

    numents = int(datadf.ix[:, 0].max())+1
    numrels = int(datadf.ix[:, 1].max())+1

    # test switcher
    '''inp = Input(T.fmatrix())
    outp = (inp[:, 0] & inp[:, 1]) >> Switcher(Sigmoid(), Ident())
    res = outp(np.asarray([[-10, 0], [-10, 1]], dtype="float32"))
    print res

    print "tested switcher"'''

    # define model
    inp = Input(T.imatrix())

    eemb = VectorEmbed.indim(numents).outdim(dims).Wreg(l2reg(wreg/10.))()
    rembfwd = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(wreg))()
    rembrev = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(wreg))()
    remb = Switcher(rembfwd, rembrev)

    # for debugging
    eembd = SymTensor(T.fmatrix())
    rembd = SymTensor(T.fmatrix())
    dotp = SymTensor(T.fmatrix())

    out = ((inp[:, 0] >> eemb >> eembd) & ((inp[:, 1] & inp[:, 2]) >> remb >> rembd)) \
          >> DotProduct() >> dotp >> Tanh()

    outtest = Output(T.fvector())

    loss = (out & outtest) >> HingeLoss()
    trainer = Trainer\
        .batsize(batsize)\
        .epochs(epochs)\
        .onrun(getonrun())\
        .offrun(offrun)\
        .offepoch(getoffepoch(out))\
        .onbatch(getonbatch(negrate, numents, numrels))\
        .optimizer(sgd(lr=lr))\
        .batchtransformer(transbat)
    trainer\
        .loss(loss)\

    trainer.train(traind.values, validd.values)\
           .test(testd.values)

    explore(eemb, rembfwd, rembrev)
    # functions for interactive exploration

    embed()
Esempio n. 5
0
def run():
    # params
    dims = 15
    numbats = 1000
    epochs = 20
    wreg = 0.00000001
    lr = 0.0001

    #paths
    datafileprefix = "../../data/nycfilms/"
    dirfwdsuffix = "direct_both_typ.pd.pkl"

    # get the data and split
    srcdf = pickle.load(open(datafileprefix+dirfwdsuffix))
    #datadf = readdata(srcdf)

    def pairstomatrix(srcd):
        datamatdf = pd.DataFrame.from_records(srcd["r"].apply(Counter)).fillna(value=0)
        datamatdf["e"] = srcd["e"]
        datamatdf.set_index("e", inplace=True)
        return datamatdf

    datamatdf = pairstomatrix(srcdf)

    # build intermediate dics
    imentdic = dict(zip(range(len(datamatdf.index.values)), datamatdf.index.values))
    imreldic = dict(zip(range(len(datamatdf.columns)), datamatdf.columns))
    revimentdic = {v: k for k, v in imentdic.items()}
    revimreldic = {v: k for k, v in imreldic.items()}

    numents = datamatdf.shape[0]
    print numents
    numrels = datamatdf.shape[1]
    print numrels

    # define model
    inp = Input(T.imatrix())

    eemb = VectorEmbed.indim(numents).outdim(dims).Wreg(l2reg(wreg))()
    remb = VectorEmbed.indim(numrels).outdim(dims).Wreg(l2reg(wreg))()

    # for debugging
    eembd = SymTensor(T.fmatrix())
    rembd = SymTensor(T.fmatrix())

    out = ((inp[:, 0] >> eemb >> eembd) & (inp[:, 1] >> remb >> rembd)) >> DotProduct()

    # for plotting purposes: relation to relation dot product (or relation-type)
    r2rinp = Input(T.imatrix())
    rel2rel = ((r2rinp[:, 0] >> remb) & (r2rinp[:, 1] >> remb)) >> DotProduct()

    outtest = Output(T.fvector())

    loss = (out & outtest) >> SSELoss()
    sampler = SimpleRandomIdxBatcher(numbats=numbats)
    trainer = Trainer\
        .loss(loss)\
        .batcher(sampler)\
        .epochs(epochs)\
        .optimizer(sgd(lr=lr))\
        .onrun(getonrun())\
        .offrun(offrun)\
        .offepoch(getoffepoch(out, rel2rel))\

    trainer.train(datamatdf.values)

    explore(eemb, remb, lambda x: revimentdic[x], lambda x: revimreldic[x])
    # functions for interactive exploration

    embed()