def exConMat(dbname, r, cate, iterflag, modeflag): fn4 = dbname + '_results_r' + str( r) + '_' + iterflag + '_' + modeflag + '.npy' fn5 = dbname + '_results_r' + str( r) + '_' + iterflag + '_' + modeflag + '.cpkl' path4 = util.getPath2(fn4) path5 = util.getPath2(fn5) cres = numpy.load(path4) objs = dataLoader3.cpklload(path5) anaT, concT = summarize(cres, cate) cml = [] for i in xrange(numpy.shape(concT)[0]): x1 = concT[i, 2] x2 = concT[i, 1] s, t = numpy.shape(cres) for m in xrange(s): y1 = cres[m, 2] y2 = cres[m, 5] if cres[m, 2] == concT[i, 2] and cres[m, 5] == concT[i, 1]: cml.append([ concT[i, 2], concT[i, 1], objs[m][0], objs[m][1], objs[m][2], objs[m][3] ]) fn6 = dbname + '_result_r' + str( r) + '_' + iterflag + '_ex' + modeflag + '.cpkl' path6 = util.getPath2(fn6) dataLoader3.cpklsave(cml, path6)
def seqLearnCM(cate, ratio, r, numCenterList, alphaList, QList, KList, lenPara, dbname, iter='ZZZ', mode='ZZZ'): # setting and starting ppservers ... ppservers = () ncpus = 2 job_server = pp.Server(ncpus, ppservers=ppservers) print str(datetime.datetime.now() )[:19] + " Starting python parallel with", job_server.get_ncpus( ), "workers. " paraPerm = list(itertools.product(numCenterList, alphaList, QList, KList)) path4 = util.getPath2(dbname + "_ycobjs_r" + str(r) + ".cpkl") if os.path.exists(path4): fnResults = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter + "_" + mode + ".npy") fnRBFobjs = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter + "_" + mode + ".cpkl") if not os.path.exists(fnResults): ctrainM, ctestM, ytrainM, ytestM = dataLoader3.cpklload(path4) # ctrainM, ctestM = numpy.mat(ctrain).T, numpy.mat(ctest).T results = numpy.mat(numpy.zeros((0, lenPara))) objsRBF = [] jobs = [ job_server.submit( classifyCM, (ratio, r, para[0], para[1], para[2], para[3], lenPara, ytrainM, ytestM, ctrainM, ctestM, dbname), (), ("numpy", "dataProcess", "RBFNetwork", "dataLoader3", "datetime", "util")) for para in paraPerm ] for job in jobs: cnm = job() result, objs = cnm[0], cnm[1] results = numpy.concatenate((results, result), axis=0) objsRBF = objsRBF + objs print "\n" if not (numpy.shape(results)[0] == 0 or len(objsRBF) == 0): dataLoader3.saveMatrix([results], [fnResults]) dataLoader3.saveObjects([objsRBF], [fnRBFobjs]) else: util.infoErr([fnResults, fnRBFobjs]) else: util.exPath([fnResults, fnRBFobjs]) else: util.noPath([path4])
def classifyCM(ratio, r, numCenter, alpha, Q, K, lenPara, trainY, testY, countLineTrain, countLineTest, dbname): results = numpy.mat(numpy.zeros((0, lenPara))) objs = [] path5 = util.getPath2(dbname + "_bow" + "_r" + str(r) + "_k" + str(K) + ".cpkl") # bowTrain, bowTest, centroids = dataLoader3.cpklload(path5) if os.path.exists(path5): print "\n" + str(datetime.datetime.now( ))[:19] + " Sequence learning for {r = " + str( r) + ", numCenter = " + str(numCenter) + ", alpha = " + str( alpha) + ", Q = " + str(Q) + ", K = " + str(K) + "} ... " bowTrain, bowTest, centroids = dataLoader3.cpklload(path5) # normalize dataset trainX = dataProcess.autoNorm(bowTrain, countLineTrain) testX = dataProcess.autoNorm(bowTest, countLineTest) # define the architecture of Neural Network indim = numpy.shape(trainX)[1] outdim = numpy.shape(trainY)[1] RBFClassifier = RBFNetwork.RBFNN(indim, numCenter, outdim, alpha, Q) # classifier training and testing RBFClassifier.train(trainX, trainY) trainOut = RBFClassifier.test(trainX) testOut = RBFClassifier.test(testX) print str( datetime.datetime.now() )[: 19] + " The classifier testing is done! the shapes of {outputTrain, outputTest} are: " + str( numpy.shape(trainOut)) + ", " + str(numpy.shape(testOut)) '''bug bug bug''' results1, cmtrain, cmtest = RBFClassifier.evaluateAll( trainX, trainY, trainOut, testX, testY, testOut) results2 = [ratio, r, numCenter, alpha, Q, K] results3 = numpy.mat(results2 + results1) results = numpy.concatenate((results, results3), axis=0) '''***************************************************''' obj = [trainOut, testOut, cmtrain, cmtest] objs.append(obj) '''***************************************************''' '''bug bug bug''' # The epoch is done! print str(datetime.datetime.now( ))[:19] + " Sequence learning for {r = " + str( r) + ", numCenter = " + str(numCenter) + ", alpha = " + str( alpha) + ", Q = " + str(Q) + ", K = " + str(K) + "} is done! " else: print str(datetime.datetime.now() )[:19] + " Files: " + path5 + " do not exist! " # print results, len(objs) return (results, objs)
def mergeList(pathlist): result = [] for path in pathlist: if os.path.exists(path): objs = dataLoader3.cpklload(path) for obj in objs: result.append(obj) else: util.noPath(path) return result
def reload(stipfile, actions, dbname): path1 = util.getPath(dbname + ".cpkl") if not os.path.exists(path1): print str(datetime.datetime.now())[:19] + " loading datasets ... " flags, inputSet, targetSet = dataLoader3.loadDataSet( stipfile, actions, path1) else: util.exPath([path1]) flags, inputSet, targetSet = dataLoader3.cpklload(path1) return flags, inputSet, targetSet
def anaCMat(dbname, r, iterflag, cate): fn4 = dbname + '_results_r' + str(r) + '_' + iterflag + '_CM.npy' fn5 = dbname + '_results_r' + str(r) + '_' + iterflag + '_exCM.cpkl' path4 = util.getPath2(fn4) path5 = util.getPath2(fn5) cres = numpy.load(path4) ana3, conc3 = summarize(cres, cate) cmat = dataLoader3.cpklload(path5) icc = [] for cm in cmat: icc.append(cm[5]) return ana3, conc3, cres, icc
def split1(r, actions, stipfile, dbname): path2 = util.getPath(dbname + "_origin_r" + str(r) + ".cpkl") if os.path.exists(path2): print str(datetime.datetime.now() )[:19] + " Files: " + path2 + " are already exist! " splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX = dataLoader3.cpklload( path2) else: print str(datetime.datetime.now() )[:19] + " splitting datasets ..., round: " + str(r) flags, inputSet, targetSet = reload(stipfile, actions, dbname) '''*********************************************************''' splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX = splitter.splitDataSet( actions, flags, inputSet, targetSet, path2) return splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX
def seqLearnTest(cate, ratio, r, numCenterList, alphaList, QList, KList, lenPara, dbname, iter='ZZZ', mode='ZZZ'): paraPerm = list(itertools.product(numCenterList, alphaList, QList, KList)) path4 = util.getPath2(dbname + "_ycobjs_r" + str(r) + ".cpkl") if os.path.exists(path4): fnResults = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter + "_" + mode + ".npy") # fnRBFobjs = util.getPath2(dbname + "_results_r" + str(r) + "_" + iter + ".cpkl") if not os.path.exists(fnResults): ctrainM, ctestM, ytrainM, ytestM = dataLoader3.cpklload(path4) # ctrainM, ctestM = numpy.mat(ctrain).T, numpy.mat(ctest).T results = numpy.mat(numpy.zeros((0, lenPara))) objsRBF = [] for para in paraPerm: result = classify(ratio, r, para[0], para[1], para[2], para[3], lenPara, ytrainM, ytestM, ctrainM, ctestM, dbname) # cnm = job() # result, obj = cnm[0], cnm[1] results = numpy.concatenate((results, result), axis=0) # objsRBF = objsRBF + obj # results = numpy.concatenate((results, job()[0]), axis = 0) # objsRBF = objsRBF + job()[1] print "\n" if not numpy.shape(results)[0] == 0: dataLoader3.saveMatrix([results], [fnResults]) # dataLoader3.saveObjects([objsRBF], [fnRBFobjs]) else: util.infoErr([fnResults]) else: util.exPath([fnResults]) else: util.noPath([path4])
def cluster1(r, KList, actions, stipfile, dbname): path4 = util.getPath(dbname + "_ycobjs_r" + str(r) + ".cpkl") '''bitter of changing api frequently''' '''*********************************************************''' splitlist, xtrain, xtest, xtrainM, xtestM, ytrainM, ytestM, xrandMX = split1( r, actions, stipfile, dbname) if not os.path.exists(path4): print str(datetime.datetime.now() )[:19] + " counting datasets ..., round: " + str(r) ctrain, ctest = splitter.count(xtrain, xtest) ycobjs = [ctrain, ctest, ytrainM, ytestM] dataLoader3.cpklsave(ycobjs, path4) else: util.exPath([path4]) '''************************************************************************************************''' ctrain, ctest, ytrainM, ytestM = dataLoader3.cpklload(path4) if numpy.shape(xtrainM)[0] > 100000: '''idx = idxrandom(numpy.shape(xtrainM)[0], 10000)''' idx = splitter.idxrandom(numpy.shape(xtrainM)[0], 100000) xrandMX2 = xtrainM[idx, :] else: xrandMX2 = xtrainM print str( datetime.datetime.now())[:19] + " The shape of xrandMX2 is : " + str( numpy.shape(xrandMX2)) '''************************************************************************************************''' for k in KList: path3 = util.getPath(dbname + "_bow" + "_r" + str(r) + "_k" + str(k) + ".cpkl") if not os.path.exists(path3): cluster.kMeans(r, k, ctrain, ctest, xtrainM, xtestM, xrandMX2, path3) else: util.exPath([path3]) '''bitter of changing api frequetly'''
def evaluate(dbname, r, iterflag, modeflag): fn6 = dbname + '_result_r' + str( r) + '_' + iterflag + '_ex' + modeflag + '.cpkl' fn4 = dbname + "_ycobjs_r" + str(r) + ".cpkl" path4 = util.getPath2(fn4) path6 = util.getPath2(fn6) ycobj = dataLoader3.cpklload(path4) outcm = dataLoader3.cpklload(path6) ctrain, ctest, ytrain, ytest = ycobj[0], ycobj[1], ycobj[2], ycobj[3] mtclistMlist = [] for oc in outcm: m, k, outtrain, outtest, cmtrain, cmtest = oc[0], oc[1], oc[2], oc[ 3], oc[4], oc[5] # tartrainL = transLabelML(ytrain) # outtrainL = transLabelML(outtrain) outtestI = dataProcess.ftoi(outtest) '''designed for kth''' ytestM = numpy.mat(ytest) tar = transLabelML(ytestM) out = transLabelML(outtestI) cm = ConMat(out, tar) # not (tar, out) print cm # iF1, iG, ipairs = AnaConMat(cm) mtclist = [] for i in xrange(numpy.shape(cm)[1]): t, it = multi2bin(tar, i) o, io = multi2bin(out, i) auc = AUC(t, o) print auc '''**************bugs***************''' # assumption: TNR == iTPR and TPR == iTNR # TPR, TNR = func(t, o) # iTPR, iTNR = func(it, io) print m, k, i pre, rec, thre = PRC(t, o) ipre, irec, ithre = PRC(it, io) tnr, itnr = rec, irec tpr, itpr = irec, rec gmean = numpy.sqrt(tpr[-2] * tnr[-2]) print pre, rec, thre print ipre, irec, ithre print gmean '''**************bugs***************''' f1s = F1Score(t, o) print f1s mtc = [auc, pre[-2], rec[-2], tpr[-2], tnr[-2], gmean, f1s[-1]] mtclist.append(mtc) mtclistM = numpy.array(mtclist) mtclistMlist.append(mtclistM) return mtclistMlist
import numpy as np import dataLoader3 dbname = 'ucf' b_round = 9 e_round = 12 for sround in range(b_round, e_round): path2 = '../data/pwd/%s_origin_r%d.cpkl' % (dbname, sround) print path2 splitlist, ctrain, ctest, ctrainM, ctestM, xtrainM, xtestM, ytrainM, ytestM, xrandMX = dataLoader3.cpklload( path2) np.savetxt('../data/pwd/%s_ctrain_r%d.txt' % (dbname, sround), ctrain) np.savetxt('../data/pwd/%s_ctest_r%d.txt' % (dbname, sround), ctest) np.savetxt('../data/pwd/%s_ytrain_r%d.txt' % (dbname, sround), ytrainM) np.savetxt('../data/pwd/%s_ytest_r%d.txt' % (dbname, sround), ytestM) np.savetxt('../data/pwd/%s_xtrain_r%d.txt' % (dbname, sround), xtrainM) np.savetxt('../data/pwd/%s_xtest_r%d.txt' % (dbname, sround), xtestM) np.savetxt('../data/pwd/%s_xrand_r%d.txt' % (dbname, sround), xrandMX)