Esempio n. 1
0
def exConMat(dbname, r, cate, iterflag, modeflag):
    fn4 = dbname + '_results_r' + str(
        r) + '_' + iterflag + '_' + modeflag + '.npy'
    fn5 = dbname + '_results_r' + str(
        r) + '_' + iterflag + '_' + modeflag + '.cpkl'

    path4 = util.getPath2(fn4)
    path5 = util.getPath2(fn5)

    cres = numpy.load(path4)
    objs = dataLoader3.cpklload(path5)

    anaT, concT = summarize(cres, cate)

    cml = []
    for i in xrange(numpy.shape(concT)[0]):
        x1 = concT[i, 2]
        x2 = concT[i, 1]

        s, t = numpy.shape(cres)
        for m in xrange(s):
            y1 = cres[m, 2]
            y2 = cres[m, 5]

            if cres[m, 2] == concT[i, 2] and cres[m, 5] == concT[i, 1]:
                cml.append([
                    concT[i, 2], concT[i, 1], objs[m][0], objs[m][1],
                    objs[m][2], objs[m][3]
                ])

    fn6 = dbname + '_result_r' + str(
        r) + '_' + iterflag + '_ex' + modeflag + '.cpkl'
    path6 = util.getPath2(fn6)
    dataLoader3.cpklsave(cml, path6)
Esempio n. 2
0
def seqLearnCM(cate,
               ratio,
               r,
               numCenterList,
               alphaList,
               QList,
               KList,
               lenPara,
               dbname,
               iter='ZZZ',
               mode='ZZZ'):
    # setting and starting ppservers ...

    ppservers = ()
    ncpus = 2
    job_server = pp.Server(ncpus, ppservers=ppservers)
    print str(datetime.datetime.now()
              )[:19] + " Starting python parallel with", job_server.get_ncpus(
              ), "workers. "

    paraPerm = list(itertools.product(numCenterList, alphaList, QList, KList))

    path4 = util.getPath2(dbname + "_ycobjs_r" + str(r) + ".cpkl")
    if os.path.exists(path4):
        fnResults = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter +
                                  "_" + mode + ".npy")
        fnRBFobjs = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter +
                                  "_" + mode + ".cpkl")

        if not os.path.exists(fnResults):
            ctrainM, ctestM, ytrainM, ytestM = dataLoader3.cpklload(path4)
            # ctrainM, ctestM = numpy.mat(ctrain).T, numpy.mat(ctest).T

            results = numpy.mat(numpy.zeros((0, lenPara)))
            objsRBF = []
            jobs = [
                job_server.submit(
                    classifyCM,
                    (ratio, r, para[0], para[1], para[2], para[3], lenPara,
                     ytrainM, ytestM, ctrainM, ctestM, dbname), (),
                    ("numpy", "dataProcess", "RBFNetwork", "dataLoader3",
                     "datetime", "util")) for para in paraPerm
            ]
            for job in jobs:
                cnm = job()
                result, objs = cnm[0], cnm[1]
                results = numpy.concatenate((results, result), axis=0)
                objsRBF = objsRBF + objs

            print "\n"
            if not (numpy.shape(results)[0] == 0 or len(objsRBF) == 0):
                dataLoader3.saveMatrix([results], [fnResults])
                dataLoader3.saveObjects([objsRBF], [fnRBFobjs])
            else:
                util.infoErr([fnResults, fnRBFobjs])
        else:
            util.exPath([fnResults, fnRBFobjs])
    else:
        util.noPath([path4])
Esempio n. 3
0
def classifyCM(ratio, r, numCenter, alpha, Q, K, lenPara, trainY, testY,
               countLineTrain, countLineTest, dbname):
    results = numpy.mat(numpy.zeros((0, lenPara)))
    objs = []

    path5 = util.getPath2(dbname + "_bow" + "_r" + str(r) + "_k" + str(K) +
                          ".cpkl")
    # bowTrain, bowTest, centroids = dataLoader3.cpklload(path5)

    if os.path.exists(path5):
        print "\n" + str(datetime.datetime.now(
        ))[:19] + " Sequence learning for {r = " + str(
            r) + ", numCenter = " + str(numCenter) + ", alpha = " + str(
                alpha) + ", Q = " + str(Q) + ", K = " + str(K) + "} ... "
        bowTrain, bowTest, centroids = dataLoader3.cpklload(path5)

        # normalize dataset
        trainX = dataProcess.autoNorm(bowTrain, countLineTrain)
        testX = dataProcess.autoNorm(bowTest, countLineTest)

        # define the architecture of Neural Network
        indim = numpy.shape(trainX)[1]
        outdim = numpy.shape(trainY)[1]
        RBFClassifier = RBFNetwork.RBFNN(indim, numCenter, outdim, alpha, Q)

        # classifier training and testing
        RBFClassifier.train(trainX, trainY)
        trainOut = RBFClassifier.test(trainX)
        testOut = RBFClassifier.test(testX)
        print str(
            datetime.datetime.now()
        )[:
          19] + " The classifier testing is done! the shapes of {outputTrain, outputTest} are: " + str(
              numpy.shape(trainOut)) + ", " + str(numpy.shape(testOut))
        '''bug bug bug'''
        results1, cmtrain, cmtest = RBFClassifier.evaluateAll(
            trainX, trainY, trainOut, testX, testY, testOut)
        results2 = [ratio, r, numCenter, alpha, Q, K]
        results3 = numpy.mat(results2 + results1)
        results = numpy.concatenate((results, results3), axis=0)
        '''***************************************************'''
        obj = [trainOut, testOut, cmtrain, cmtest]
        objs.append(obj)
        '''***************************************************'''
        '''bug bug bug'''

        # The epoch is done!
        print str(datetime.datetime.now(
        ))[:19] + " Sequence learning for {r = " + str(
            r) + ", numCenter = " + str(numCenter) + ", alpha = " + str(
                alpha) + ", Q = " + str(Q) + ", K = " + str(K) + "} is done! "

    else:
        print str(datetime.datetime.now()
                  )[:19] + " Files: " + path5 + " do not exist! "

    # print results, len(objs)
    return (results, objs)
Esempio n. 4
0
def mergeList(pathlist):
    result = []
    for path in pathlist:
        if os.path.exists(path):
            objs = dataLoader3.cpklload(path)
            for obj in objs:
                result.append(obj)
        else:
            util.noPath(path)
    return result
Esempio n. 5
0
def reload(stipfile, actions, dbname):
    path1 = util.getPath(dbname + ".cpkl")
    if not os.path.exists(path1):
        print str(datetime.datetime.now())[:19] + " loading datasets ... "
        flags, inputSet, targetSet = dataLoader3.loadDataSet(
            stipfile, actions, path1)
    else:
        util.exPath([path1])
        flags, inputSet, targetSet = dataLoader3.cpklload(path1)

    return flags, inputSet, targetSet
Esempio n. 6
0
def anaCMat(dbname, r, iterflag, cate):
    fn4 = dbname + '_results_r' + str(r) + '_' + iterflag + '_CM.npy'
    fn5 = dbname + '_results_r' + str(r) + '_' + iterflag + '_exCM.cpkl'

    path4 = util.getPath2(fn4)
    path5 = util.getPath2(fn5)

    cres = numpy.load(path4)
    ana3, conc3 = summarize(cres, cate)

    cmat = dataLoader3.cpklload(path5)

    icc = []
    for cm in cmat:
        icc.append(cm[5])

    return ana3, conc3, cres, icc
Esempio n. 7
0
def split1(r, actions, stipfile, dbname):
    path2 = util.getPath(dbname + "_origin_r" + str(r) + ".cpkl")

    if os.path.exists(path2):
        print str(datetime.datetime.now()
                  )[:19] + " Files: " + path2 + " are already exist! "
        splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX = dataLoader3.cpklload(
            path2)
    else:
        print str(datetime.datetime.now()
                  )[:19] + " splitting datasets ..., round: " + str(r)
        flags, inputSet, targetSet = reload(stipfile, actions, dbname)
        '''*********************************************************'''
        splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX = splitter.splitDataSet(
            actions, flags, inputSet, targetSet, path2)

    return splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX
Esempio n. 8
0
def seqLearnTest(cate,
                 ratio,
                 r,
                 numCenterList,
                 alphaList,
                 QList,
                 KList,
                 lenPara,
                 dbname,
                 iter='ZZZ',
                 mode='ZZZ'):
    paraPerm = list(itertools.product(numCenterList, alphaList, QList, KList))

    path4 = util.getPath2(dbname + "_ycobjs_r" + str(r) + ".cpkl")
    if os.path.exists(path4):
        fnResults = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter +
                                  "_" + mode + ".npy")
        # fnRBFobjs = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter + ".cpkl")

        if not os.path.exists(fnResults):
            ctrainM, ctestM, ytrainM, ytestM = dataLoader3.cpklload(path4)
            # ctrainM, ctestM = numpy.mat(ctrain).T, numpy.mat(ctest).T

            results = numpy.mat(numpy.zeros((0, lenPara)))
            objsRBF = []
            for para in paraPerm:
                result = classify(ratio, r, para[0], para[1], para[2], para[3],
                                  lenPara, ytrainM, ytestM, ctrainM, ctestM,
                                  dbname)
                # cnm = job()
                # result, obj = cnm[0], cnm[1]
                results = numpy.concatenate((results, result), axis=0)
                # objsRBF = objsRBF + obj
                # results = numpy.concatenate((results, job()[0]), axis = 0)
                # objsRBF = objsRBF + job()[1]

            print "\n"
            if not numpy.shape(results)[0] == 0:
                dataLoader3.saveMatrix([results], [fnResults])
                # dataLoader3.saveObjects([objsRBF], [fnRBFobjs])
            else:
                util.infoErr([fnResults])
        else:
            util.exPath([fnResults])
    else:
        util.noPath([path4])
Esempio n. 9
0
def cluster1(r, KList, actions, stipfile, dbname):
    path4 = util.getPath(dbname + "_ycobjs_r" + str(r) + ".cpkl")
    '''bitter of changing api frequently'''
    '''*********************************************************'''
    splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX = split1(
        r, actions, stipfile, dbname)

    if not os.path.exists(path4):
        print str(datetime.datetime.now()
                  )[:19] + " counting datasets ..., round: " + str(r)
        ctrain, ctest = splitter.count(xtrain, xtest)
        ycobjs = [ctrain, ctest, ytrainM, ytestM]
        dataLoader3.cpklsave(ycobjs, path4)
    else:
        util.exPath([path4])
    '''************************************************************************************************'''
    ctrain, ctest, ytrainM, ytestM = dataLoader3.cpklload(path4)
    if numpy.shape(xtrainM)[0] > 100000:
        '''idx = idxrandom(numpy.shape(xtrainM)[0], 10000)'''
        idx = splitter.idxrandom(numpy.shape(xtrainM)[0], 100000)
        xrandMX2 = xtrainM[idx, :]
    else:
        xrandMX2 = xtrainM

    print str(
        datetime.datetime.now())[:19] + " The shape of xrandMX2 is : " + str(
            numpy.shape(xrandMX2))
    '''************************************************************************************************'''

    for k in KList:
        path3 = util.getPath(dbname + "_bow" + "_r" + str(r) + "_k" + str(k) +
                             ".cpkl")

        if not os.path.exists(path3):
            cluster.kMeans(r, k, ctrain, ctest, xtrainM, xtestM, xrandMX2,
                           path3)
        else:
            util.exPath([path3])
    '''bitter of changing api frequetly'''
Esempio n. 10
0
def evaluate(dbname, r, iterflag, modeflag):
    fn6 = dbname + '_result_r' + str(
        r) + '_' + iterflag + '_ex' + modeflag + '.cpkl'
    fn4 = dbname + "_ycobjs_r" + str(r) + ".cpkl"

    path4 = util.getPath2(fn4)
    path6 = util.getPath2(fn6)

    ycobj = dataLoader3.cpklload(path4)
    outcm = dataLoader3.cpklload(path6)

    ctrain, ctest, ytrain, ytest = ycobj[0], ycobj[1], ycobj[2], ycobj[3]

    mtclistMlist = []
    for oc in outcm:
        m, k, outtrain, outtest, cmtrain, cmtest = oc[0], oc[1], oc[2], oc[
            3], oc[4], oc[5]
        # tartrainL = transLabelML(ytrain)
        # outtrainL = transLabelML(outtrain)
        outtestI = dataProcess.ftoi(outtest)
        '''designed for kth'''
        ytestM = numpy.mat(ytest)

        tar = transLabelML(ytestM)
        out = transLabelML(outtestI)

        cm = ConMat(out, tar)  # not (tar, out)
        print cm

        # iF1, iG, ipairs = AnaConMat(cm)

        mtclist = []
        for i in xrange(numpy.shape(cm)[1]):
            t, it = multi2bin(tar, i)
            o, io = multi2bin(out, i)

            auc = AUC(t, o)
            print auc
            '''**************bugs***************'''
            # assumption: TNR == iTPR and TPR == iTNR
            # TPR, TNR = func(t, o)
            # iTPR, iTNR = func(it, io)
            print m, k, i
            pre, rec, thre = PRC(t, o)
            ipre, irec, ithre = PRC(it, io)
            tnr, itnr = rec, irec
            tpr, itpr = irec, rec
            gmean = numpy.sqrt(tpr[-2] * tnr[-2])
            print pre, rec, thre
            print ipre, irec, ithre
            print gmean
            '''**************bugs***************'''

            f1s = F1Score(t, o)
            print f1s

            mtc = [auc, pre[-2], rec[-2], tpr[-2], tnr[-2], gmean, f1s[-1]]
            mtclist.append(mtc)
        mtclistM = numpy.array(mtclist)
        mtclistMlist.append(mtclistM)

    return mtclistMlist
Esempio n. 11
0
import numpy as np

import dataLoader3

dbname = 'ucf'
b_round = 9
e_round = 12

for sround in range(b_round, e_round):
    path2 = '../data/pwd/%s_origin_r%d.cpkl' % (dbname, sround)
    print path2
    splitlist, ctrain, ctest, ctrainM, ctestM, xtrainM, xtestM, ytrainM, ytestM, xrandMX = dataLoader3.cpklload(
        path2)

    np.savetxt('../data/pwd/%s_ctrain_r%d.txt' % (dbname, sround), ctrain)
    np.savetxt('../data/pwd/%s_ctest_r%d.txt' % (dbname, sround), ctest)

    np.savetxt('../data/pwd/%s_ytrain_r%d.txt' % (dbname, sround), ytrainM)
    np.savetxt('../data/pwd/%s_ytest_r%d.txt' % (dbname, sround), ytestM)

    np.savetxt('../data/pwd/%s_xtrain_r%d.txt' % (dbname, sround), xtrainM)
    np.savetxt('../data/pwd/%s_xtest_r%d.txt' % (dbname, sround), xtestM)
    np.savetxt('../data/pwd/%s_xrand_r%d.txt' % (dbname, sround), xrandMX)