Exemple #1
0
        print np.vectorize(lambda x: reventdic[x] if x in reventdic else None)(
            np.argmax(pred, axis=1) - 1)
        tt.tock("predicted sample")
        tt.tick("training")
        m.train([lexdata[1:151]], entids[1:151]).adagrad(lr=lr).cross_entropy().grad_total_norm(0.5)\
            .split_validate(5, random=True).validinter(validinter).accuracy()\
            .train(numbats, epochs)
    else:
        #embed()
        tt.tick("predicting")
        print traindata[:5].shape, outdata[:5].shape
        pred = m.predict(traindata[:5], outdata[:5])
        print np.argmax(pred, axis=2) - 1
        print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
        tt.tock("predicted sample")

        tt.tick("training")
        m.train([traindata, outdata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).seq_cross_entropy()\
            .split_validate(splits=5, random=False).validinter(validinter).seq_accuracy().seq_cross_entropy()\
            .train(numbats, epochs)
        #embed()

        tt.tock("trained").tick("predicting")
        pred = m.predict(traindata[:50], outdata[:50])
        print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
        tt.tock("predicted sample")


if __name__ == "__main__":
    argprun(run, model="mem att")
Exemple #2
0
    m = SimpleSeqEncDecAtt(
        inpvocsize=numwords,
        inpembdim=wordembdim,
        outvocsize=numlabels,
        outembdim=lablembdim,
        encdim=innerdim,
        decdim=innerdim,
        attdim=attdim,
        inconcat=False
    )

    # training
    m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(1.).seq_cross_entropy().l2(wreg)\
        .validate_on([testdata, shiftdata(testgold), testmask], testgold).seq_cross_entropy().seq_accuracy().validinter(validinter)\
        .train(numbats, epochs)

    # predict after training
    s = SeqEncDecAttSearch(m)
    testpred = s.decode(testdata)
    testpred = testpred * testmask
    #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask)
    #testpred = np.argmax(testpredprobs, axis=2)-1
    #testpred = testpred * testmask
    #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred)

    evalres = atiseval(testpred-1, testgold-1, label2idxrev); print evalres


if __name__ == "__main__":
    argprun(run, epochs=1)
    for k, v in entdic.items():
        reventdic[v] = k


    # embed()
    outdata = shiftdata(golddata)

    tt.tick("predicting")
    print traindata[:5].shape, outdata[:5].shape
    #print golddata[:5]  ; exit()
    pred = m.predict(traindata[:5], outdata[:5])
    print np.argmax(pred, axis=2) - 1
    print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")

    tt.tick("training")
    m.train([traindata, outdata], golddata).adagrad(lr=lr).l2(wreg).grad_total_norm(gradnorm).seq_cross_entropy() \
        .validate_on([validdata, shiftdata(validgold)], validgold).validinter(validinter).seq_accuracy().seq_cross_entropy() \
        .train(numbats, epochs)
    # embed()

    tt.tock("trained").tick("predicting")
    pred = m.predict(validdata, shiftdata(validgold))
    print np.argmax(pred, axis=2) - 1
    #print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")


if __name__ == "__main__":
    argprun(run, model="lex")
    testgold = getdatamatrix(test, maxlen, 2).astype("int32")
    testmask = (testdata > 0).astype("float32")

    res = atiseval(testgold-1, testgold-1, label2idxrev); print res#; exit()

    # define model
    innerdim = [innerdim] * depth
    m = SimpleSeqTransDec(indim=numwords, inpembdim=wordembdim, outembdim=lablembdim, innerdim=innerdim, outdim=numlabels)

    # training
    m = m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(5.0).seq_cross_entropy().l2(wreg)\
        .split_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(validinter).takebest()\
        .train(numbats, epochs)

    # predict after training
    s = SeqTransDecSearch(m)
    testpred, _ = s.decode(testdata)
    testpred = testpred * testmask

    evalres = atiseval(testpred-1, testgold-1, label2idxrev); print evalres

    #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask)
    #testpred = np.argmax(testpredprobs, axis=2)-1
    #testpred = testpred * testmask
    #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred)



if __name__ == "__main__":
    argprun(run, epochs=10)
Exemple #5
0

def runstuff(modelname, griddict, scriptname):
    for i in range(reduce(lambda x, y: x * y, map(len, griddict.values()))):
        indexes = OrderedDict()
        for k, v in griddict.items():
            indexes[k] = i % len(v)
            i //= len(v)
        #print indexes
        options = "".join([
            "-{} {} ".format(x, griddict[x][indexes[x]])
            for x in griddict.keys()
        ])
        cmd = """python {}
                    -loadmodel {}
                    {}"""\
            .format(scriptname,
                    modelname,
                    options
                    )
        cmd = re.sub("\n", "", cmd)
        cmd = re.sub("\s{2,}", " ", cmd)
        print cmd
        targetname = "alleval/{}.out".format(re.sub("\s", "_", cmd))
        os.system("echo {} > {}".format(cmd, targetname))
        os.system("{} >> {} 2>&1".format(cmd, targetname))


if __name__ == "__main__":
    argprun(main)
Exemple #6
0
    ####data = np.eye(numchars, numchars)[data, :]

    block = seq2idx(invocsize=numchars, outvocsize=numwords, innerdim=statedim)
    '''gru = GRU(innerdim=statedim, dim=numchars)
    lin = Lin(indim=statedim, dim=numwords)
    lin2 = Lin(indim=numwords, dim=numwords)
    block = asblock(lambda x: Softmax()(lin(gru(x)[:, -1, :])))'''
    ###block = asblock(lambda x: Softmax()(lin2(x)))
    '''
    print testpred
    probepred = np.argmax(block.predict(testpred), axis=1)
    print probepred

    for p in block.output.allparams:
        print p
    '''
    block.train([data], wordidxs).cross_entropy().adagrad(lr=lr).autovalidate().accuracy().validinter(5)\
         .train(numbats=numbats, epochs=epochs)

    #embed()
    pred = block.predict(testpred)
    print pred.shape
    print np.argmax(pred, axis=1)
    #'''




if __name__ == "__main__":
    argprun(run_seqdecatt, epochs=50)
    #print ints2words(np.asarray([[20,8,5,0,0,0], [1,2,3,0,0,0]]))
Exemple #7
0
        print np.argmax(pred, axis=1)-1
        print np.vectorize(lambda x: reventdic[x] if x in reventdic else None)(np.argmax(pred, axis=1)-1)
        tt.tock("predicted sample")
        tt.tick("training")
        m.train([lexdata[1:151]], entids[1:151]).adagrad(lr=lr).cross_entropy().grad_total_norm(0.5)\
            .split_validate(5, random=True).validinter(validinter).accuracy()\
            .train(numbats, epochs)
    else:
        #embed()
        tt.tick("predicting")
        print traindata[:5].shape, outdata[:5].shape
        pred = m.predict(traindata[:5], outdata[:5])
        print np.argmax(pred, axis=2)-1
        print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2)-1)
        tt.tock("predicted sample")

        tt.tick("training")
        m.train([traindata, outdata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).seq_cross_entropy()\
            .split_validate(splits=5, random=False).validinter(validinter).seq_accuracy().seq_cross_entropy()\
            .train(numbats, epochs)
        #embed()

        tt.tock("trained").tick("predicting")
        pred = m.predict(traindata[:50], outdata[:50])
        print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2)-1)
        tt.tock("predicted sample")


if __name__ == "__main__":
    argprun(run, model="mem att")
Exemple #8
0
    #wenc = WordEncoderPlusGlove(numchars=numchars, numwords=vocnumwords, encdim=wordencdim, embdim=wordembdim)
    tt.tock("model defined")

    # embed()
    outdata = shiftdata(golddata)

    tt.tick("predicting")
    print traindata[:5].shape, outdata[:5].shape
    #print golddata[:5]  ; exit()
    pred = m.predict(traindata[:5], outdata[:5])
    print np.argmax(pred, axis=2) - 1
    print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")

    tt.tick("training")
    m.train([traindata, outdata], golddata).adagrad(lr=lr).l2(wreg).grad_total_norm(gradnorm).seq_cross_entropy() \
        .validate_on([validdata, shiftdata(validgold)], validgold).validinter(validinter).seq_accuracy().seq_cross_entropy() \
        .train(numbats, epochs)
    # embed()

    tt.tock("trained").tick("predicting")
    pred = m.predict(validdata, shiftdata(validgold))
    print np.argmax(pred, axis=2) - 1
    #print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")


if __name__ == "__main__":
    argprun(run, model="att lex")
Exemple #9
0
    debugarg = "subj" if subjpred else "pred" if predpred else False
    evalres = eval.eval(pred, testgold, debug=debugarg)
    for k, evalre in evalres.items():
        print("{}:\t{}".format(k, evalre))
    tt.tock("evaluated")

    # save
    basename = os.path.splitext(os.path.basename(__file__))[0]
    dirname = basename + ".results"
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    savenamegen = lambda i: "{}/{}.res".format(dirname, i)
    savename = None
    for i in xrange(100):
        savename = savenamegen(i)
        if not os.path.exists(savename):
            break
        savename = None
    if savename is None:
        raise Exception("exceeded number of saved results")
    with open(savename, "w") as f:
        f.write("{}\n".format(" ".join(sys.argv)))
        for k, evalre in evalres.items():
            f.write("{}:\t{}\n".format(k, evalre))

    #scorer.save(filepath=savename)


if __name__ == "__main__":
    argprun(run, debug=True)
    #wenc = WordEncoderPlusGlove(numchars=numchars, numwords=vocnumwords, encdim=wordencdim, embdim=wordembdim)
    tt.tock("model defined")

    # embed()
    outdata = shiftdata(golddata)

    tt.tick("predicting")
    print traindata[:5].shape, outdata[:5].shape
    #print golddata[:5]  ; exit()
    pred = m.predict(traindata[:5], outdata[:5])
    print np.argmax(pred, axis=2) - 1
    print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")

    tt.tick("training")
    m.train([traindata, outdata], golddata).adagrad(lr=lr).l2(wreg).grad_total_norm(gradnorm).seq_cross_entropy() \
        .validate_on([validdata, shiftdata(validgold)], validgold).validinter(validinter).seq_accuracy().seq_cross_entropy() \
        .train(numbats, epochs)
    # embed()

    tt.tock("trained").tick("predicting")
    pred = m.predict(validdata, shiftdata(validgold))
    print np.argmax(pred, axis=2) - 1
    #print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")


if __name__ == "__main__":
    argprun(run, model="att lex")
Exemple #11
0
    ####data = np.eye(numchars, numchars)[data, :]

    block = seq2idx(invocsize=numchars, outvocsize=numwords, innerdim=statedim)
    '''gru = GRU(innerdim=statedim, dim=numchars)
    lin = Lin(indim=statedim, dim=numwords)
    lin2 = Lin(indim=numwords, dim=numwords)
    block = asblock(lambda x: Softmax()(lin(gru(x)[:, -1, :])))'''
    ###block = asblock(lambda x: Softmax()(lin2(x)))
    '''
    print testpred
    probepred = np.argmax(block.predict(testpred), axis=1)
    print probepred

    for p in block.output.allparams:
        print p
    '''
    block.train([data], wordidxs).cross_entropy().adagrad(lr=lr).autovalidate().accuracy().validinter(5)\
         .train(numbats=numbats, epochs=epochs)

    #embed()
    pred = block.predict(testpred)
    print pred.shape
    print np.argmax(pred, axis=1)
    #'''




if __name__ == "__main__":
    argprun(run_RNNAutoEncoder)
    #print ints2words(np.asarray([[20,8,5,0,0,0], [1,2,3,0,0,0]]))
Exemple #12
0
    # define model
    innerdim = [innerdim] * depth
    m = SimpleSeqTransDec(indim=numwords,
                          inpembdim=wordembdim,
                          outembdim=lablembdim,
                          innerdim=innerdim,
                          outdim=numlabels)

    # training
    m = m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(5.0).seq_cross_entropy().l2(wreg)\
        .split_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(validinter).takebest()\
        .train(numbats, epochs)

    # predict after training
    s = GreedySearch(m, startsymbol=0)
    testpred, _ = s.decode(testdata)
    testpred = testpred * testmask

    evalres = atiseval(testpred - 1, testgold - 1, label2idxrev)
    print evalres

    #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask)
    #testpred = np.argmax(testpredprobs, axis=2)-1
    #testpred = testpred * testmask
    #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred)


if __name__ == "__main__":
    argprun(run, epochs=10)
Exemple #13
0
    reventdic = {}
    for k, v in entdic.items():
        reventdic[v] = k

    # embed()
    outdata = shiftdata(golddata)

    tt.tick("predicting")
    print traindata[:5].shape, outdata[:5].shape
    #print golddata[:5]  ; exit()
    pred = m.predict(traindata[:5], outdata[:5])
    print np.argmax(pred, axis=2) - 1
    print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")

    tt.tick("training")
    m.train([traindata, outdata], golddata).adagrad(lr=lr).l2(wreg).grad_total_norm(gradnorm).seq_cross_entropy() \
        .validate_on([validdata, shiftdata(validgold)], validgold).validinter(validinter).seq_accuracy().seq_cross_entropy() \
        .train(numbats, epochs)
    # embed()

    tt.tock("trained").tick("predicting")
    pred = m.predict(validdata, shiftdata(validgold))
    print np.argmax(pred, axis=2) - 1
    #print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1)
    tt.tock("predicted sample")


if __name__ == "__main__":
    argprun(run, model="lex")
Exemple #14
0
    res = atiseval(testgold-1, testgold-1, label2idxrev); print res#; exit()

    print asizeof(traindata)

    # define model
    innerdim = [innerdim] * depth
    m = SimpleSeqTransDec(indim=numwords, inpembdim=wordembdim, outembdim=lablembdim, innerdim=innerdim, outdim=numlabels)

    # training
    m = m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(5.0).seq_cross_entropy().l2(wreg)\
        .cross_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(validinter).takebest()\
        .train(numbats, epochs)

    # predict after training
    s = SeqTransDecSearch(m)
    testpred = s.decode(testdata)
    testpred = testpred * testmask

    evalres = atiseval(testpred-1, testgold-1, label2idxrev); print evalres

    #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask)
    #testpred = np.argmax(testpredprobs, axis=2)-1
    #testpred = testpred * testmask
    #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred)




if __name__ == "__main__":
    argprun(run, epochs=0)
Exemple #15
0
    debugarg = "subj" if subjpred else "pred" if predpred else False
    evalres = eval.eval(pred, testgold, debug=debugarg)
    for k, evalre in evalres.items():
        print("{}:\t{}".format(k, evalre))
    tt.tock("evaluated")

    # save
    basename = os.path.splitext(os.path.basename(__file__))[0]
    dirname = basename + ".results"
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    savenamegen = lambda i: "{}/{}.res".format(dirname, i)
    savename = None
    for i in xrange(1000):
        savename = savenamegen(i)
        if not os.path.exists(savename):
            break
        savename = None
    if savename is None:
        raise Exception("exceeded number of saved results")
    with open(savename, "w") as f:
        f.write("{}\n".format(" ".join(sys.argv)))
        for k, evalre in evalres.items():
            f.write("{}:\t{}\n".format(k, evalre))

    #scorer.save(filepath=savename)


if __name__ == "__main__":
    argprun(run, debug=True)
Exemple #16
0
            memaddr = DotMemAddr
        elif memaddr == "lin":
            memaddr = LinearGateMemAddr
        dec = MemVec2Idx(memenc, memdata, memdim=innerdim, memaddr=memaddr, memattdim=memattdim)
    else:
        dec = SimpleVec2Idx(indim=innerdim, outdim=numrels)

    m = Seq2Idx(enc, dec)

    m = (
        m.train([traindata], traingold)
        .adagrad(lr=lr)
        .l2(wreg)
        .grad_total_norm(1.0)
        .cross_entropy()
        .validate_on([validdata], validgold)
        .accuracy()
        .cross_entropy()
        .takebest()
        .train(numbats=numbats, epochs=epochs)
    )

    pred = m.predict(testdata)
    print pred.shape
    evalres = evaluate(np.argmax(pred, axis=1), testgold)
    print str(evalres) + "%"


if __name__ == "__main__":
    argprun(run)
Exemple #17
0
    innerdim = [innerdim] * depth
    m = SimpleSeqEncDecAtt(inpvocsize=numwords,
                           inpembdim=wordembdim,
                           outvocsize=numlabels,
                           outembdim=lablembdim,
                           encdim=innerdim,
                           decdim=innerdim,
                           attdim=attdim,
                           inconcat=False)

    # training
    m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(1.).seq_cross_entropy().l2(wreg)\
        .validate_on([testdata, shiftdata(testgold), testmask], testgold).seq_cross_entropy().seq_accuracy().validinter(validinter)\
        .train(numbats, epochs)

    # predict after training
    s = SeqEncDecAttSearch(m)
    testpred = s.decode(testdata)
    testpred = testpred * testmask
    #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask)
    #testpred = np.argmax(testpredprobs, axis=2)-1
    #testpred = testpred * testmask
    #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred)

    evalres = atiseval(testpred - 1, testgold - 1, label2idxrev)
    print evalres


if __name__ == "__main__":
    argprun(run, epochs=1)
Exemple #18
0
            break
    datamat = np.zeros((c, maxlen)).astype("int32") - 1
    goldmat = np.zeros((c, 2)).astype("int32")
    i = 0
    for x in data:
        datamat[i, :len(x)] = x
        i += 1
    i = 0
    for x in gold:
        goldmat[i, :] = x
        i += 1
    # making chardic and transforming through chardic
    thischardic = dict(map(lambda (x, y): (ord(x), y), chardic.items()))
    nextid = 0
    while nextid in thischardic.values():
        nextid += 1
    uniquechars = np.unique(datamat)
    for uniquechar in list(uniquechars):
        if not uniquechar in thischardic and uniquechar >= 0:
            thischardic[uniquechar] = nextid
            while nextid in thischardic.values():
                nextid += 1
    chardic.update(dict(map(lambda (x, y): (chr(x), y), thischardic.items())))
    print len(chardic), chardic
    datamat = np.vectorize(lambda x: thischardic[x] if x >= 0 else x)(datamat)
    return datamat, goldmat


if __name__ == "__main__":
    argprun(run)
Exemple #19
0
    wordidxsonehot = np.eye(numwords, numwords)[wordidxs, :]

    ####data = np.eye(numchars, numchars)[data, :]

    block = seq2idx(invocsize=numchars, outvocsize=numwords, innerdim=statedim)
    '''gru = GRU(innerdim=statedim, dim=numchars)
    lin = Lin(indim=statedim, dim=numwords)
    lin2 = Lin(indim=numwords, dim=numwords)
    block = asblock(lambda x: Softmax()(lin(gru(x)[:, -1, :])))'''
    ###block = asblock(lambda x: Softmax()(lin2(x)))
    '''
    print testpred
    probepred = np.argmax(block.predict(testpred), axis=1)
    print probepred

    for p in block.output.allparams:
        print p
    '''
    block.train([data], wordidxs).cross_entropy().adagrad(lr=lr).autovalidate().accuracy().validinter(5)\
         .train(numbats=numbats, epochs=epochs)

    #embed()
    pred = block.predict(testpred)
    print pred.shape
    print np.argmax(pred, axis=1)
    #'''


if __name__ == "__main__":
    argprun(run_seqdecatt, epochs=50)
    #print ints2words(np.asarray([[20,8,5,0,0,0], [1,2,3,0,0,0]]))