def reChooseDict(instNo,currDict,currMean,weightWindowDsCurDict,currStdCompare,currMeanCompare,dicts,testDWindow,numAttrs,numInsts,outputCompare,Lambda): outputCompare.write('Re-Choose Dictionary!\n') outputPredictSparse.write('Re-Choose Dictionary!\n') weightWindowDsRe = [] # tmpsRe = [] tmpsRe = 0 # splitByEnterDsRe = [] splitByEnterDsRe = 0 numDicts = len(dicts) for i in range(numDicts): weightWindowDsRe.append(deque()) dictWeightMeanRe = {} testWindow = aL.arffLoader() testD = testWindow.fortranArrayPara(testDWindow,numAttrs,numInsts) testD = np.asfortranarray(testD / np.tile(np.sqrt((testD*testD).sum(axis=0)),(testD.shape[0],1))) for dictNo in range(numDicts): # tmpsRe[:] = [] # splitByEnterDsRe[:] = [] if(dictNo==currDict): weightWindowDsRe[dictNo] = weightWindowDsCurDict continue alpha_lasso_m1_Ds_batch = spams.lasso(testD,dicts[dictNo],return_reg_path = False,lambda1 = 1,pos=True,mode=0) for j in range(alpha_lasso_m1_Ds_batch.shape[1]): # tmpsRe.append(str(alpha_lasso_m1_Ds_batch.getcol(j))) tmpsRe = str(alpha_lasso_m1_Ds_batch.getcol(j)) #instNo+j才是正確的instance Number outputCompare.write(str(instNo-len(testDWindow)+j)+'-D'+str(dictNo)+':'+tmpsRe+'\n\n') #split #print tmpsRe[i].split('\n') # splitByEnterDsRe.append(tmpsRe[j].split('\n')) splitByEnterDsRe = tmpsRe.split('\n') # splitByEnterD1 = tmp1.split('\n') weightTmp = [] # for line in splitByEnterDsRe[j]: for line in splitByEnterDsRe: line = line.strip() #mapping page pageNo = int(line.split(',')[0].split('(')[1].strip()) #weight of mapping page weight = float(line.split(',')[1].split(')')[1].strip()) if weight < Lambda + 0.02: weightTmp.append(weight) # if weight >= 1: # print 'InstNo:'+str(instNo+j)+', DictNo:'+str(dictNo)+', page:'+str(pageNo)+', weight:'+str(weight) maxWeight = max(weightTmp) weightTmp[:] = [] #one page bug if weight = 0.0 transform to 1 if maxWeight==0: maxWeight = 1 if maxWeight > 1: maxWeight = 1 weightWindowDsRe[dictNo].append(maxWeight) #choose dict for dictNo in range(numOfDicts): #若是目前的Dict則直接給值 if(dictNo==currDict): dictWeightMeanRe[dictNo] = currMean #若不是則要重新計算一次 else: dictWeightMeanRe[dictNo] = np.mean(weightWindowDsRe[dictNo]) #找出值最大的Dict,以及平均值 maxWeightDict,meanCompareRe = max(dictWeightMeanRe.iteritems(), key=lambda x:x[1]) if(maxWeightDict==currDict): #更新currMean meanCompareRe = currMean #保留舊的currMeanCompare # meanCompareRe = currMeanCompare #更新stdCompare #保留舊的stdCompare # stdCompareRe = currStdCompare #更新stdCompare stdCompareRe = np.std(weightWindowDsRe[maxWeightDict]) # weightWindowDsRe[maxWeightDict] = weightWindowDsCurDict outputCompare.write('Keep same model'+str(maxWeightDict)+'!\n') outputPredictSparse.write('Keep same model'+str(maxWeightDict)+'!\n') else: # changeModelRe = 1 stdCompareRe = np.std(weightWindowDsRe[maxWeightDict]) outputCompare.write('Change model to model-' + str(maxWeightDict) +',meanCompare:'+str(meanCompareRe)+',stdCompare:'+str(stdCompareRe)+'!\n') outputPredictSparse.write('Change model to model-' + str(maxWeightDict) +',meanCompare:'+str(meanCompareRe)+',stdCompare:'+str(stdCompareRe)+'!\n') return (maxWeightDict,meanCompareRe,stdCompareRe,weightWindowDsRe[maxWeightDict])
#numOfDicts = len(sys.argv)-5 #algowindow numOfDicts = (len(sys.argv)-8) / 2 #Load dictionaries input_files = [] dicts = [] dictLoaders = [] Ds = [] for dictNo in range(numOfDicts): # input_files.append(open(sys.argv[i+2])) input_file = open(sys.argv[dictNo+2]) # dicts.append(input_files[i].read()) # dicts.append(input_file.read()) dict = input_file.read() dictLoaders.append(aL.arffLoader()) # dictLoaders[i].load(dicts[i]) dictLoaders[dictNo].load(dict) Ds.append(dictLoaders[dictNo].fortranArray(dictLoaders[dictNo].transactionContentList)) # print 'D'+str(dictNo+1)+':' # print Ds[dictNo] Ds[dictNo] = np.asfortranarray(Ds[dictNo] / np.tile(np.sqrt((Ds[dictNo]*Ds[dictNo]).sum(axis=0)),(Ds[dictNo].shape[0],1))) # print np.tile(np.sqrt((Ds[i]*Ds[i]).sum(axis=0) # print 'normalize of D' + str(dictNo+1) + ':' # print Ds[dictNo] algoResults = [] #Load result of other algo for i in range(numOfDicts): input_file = open(sys.argv[i+numOfDicts+2])
return (maxWeightDict,meanCompareRe,stdCompareRe,weightWindowDsRe[maxWeightDict]) Alltic = time.time() #numOfDicts = len(sys.argv)-5 #algowindow numOfDicts = (len(sys.argv)-12) / 2 #Load dictionaries input_files = [] dicts = [] dictLoaders = [] Ds = [] for i in range(numOfDicts): input_files.append(open(sys.argv[i+2])) dicts.append(input_files[i].read()) dictLoaders.append(aL.arffLoader()) dictLoaders[i].load(dicts[i]) Ds.append(dictLoaders[i].fortranArray(dictLoaders[i].transactionContentList)) # print 'D'+str(i+1)+':' # print Ds[i] Ds[i] = np.asfortranarray(Ds[i] / np.tile(np.sqrt((Ds[i]*Ds[i]).sum(axis=0)),(Ds[i].shape[0],1))) # print np.tile(np.sqrt((Ds[i]*Ds[i]).sum(axis=0) # print 'normalize of D' + str(i+1) + ':' # print Ds[i] algoResults = [] #Load result of other algo for i in range(numOfDicts): input_file = open(sys.argv[i+numOfDicts+2]) result = input_file.read() algoResults.append(aL.arffLoader())
''' Created on 2014/3/18 @author: bhchen ''' import arffLoader as aL import sys import spams import time import numpy as np input_f1 = open(sys.argv[1]) dict1 = input_f1.read() dictLoader1 = aL.arffLoader() #testContent = dictLoader1.load(dict1) dictLoader1.load(dict1) print dictLoader1.classIndex print dictLoader1.attrName print dictLoader1.transactionContentList[0] print dictLoader1.numInstance D1 = dictLoader1.fortranArray(dictLoader1.transactionContentList) print 'D1:' print D1 print D1.shape[0] tmp = (D1 * D1).sum(axis=0) print 'tmp:' print tmp print tmp.shape[0] notmp = np.sqrt((D1 * D1).sum(axis=0)) print 'notmp:'
@author: bhchen 待處理Bug: 1.第17.18筆會回傳錯誤訊息, weight為0但是似乎會選中正確的instance ''' import arffLoader as aL import sys import spams import time import numpy as np import math #test signal input_X = open(sys.argv[1]) testSignal = input_X.read() testLoader = aL.arffLoader() testLoader.load(testSignal) X = testLoader.fortranArray(testLoader.transactionContentList) print 'X:' print X X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)), (X.shape[0], 1))) print 'normalize of X:' print X numOfDicts = len(sys.argv) - 5 #Load dictionaries input_files = [] dicts = [] dictLoaders = []
import scipy as sp #print np.__version__ #print sp.__version__ #dictionary 1 input_f1 = open(sys.argv[1]) dict1 = input_f1.read() #dictionary 2 input_f2 = open(sys.argv[2]) dict2 = input_f2.read() #test signal input_X = open(sys.argv[3]) testSignal = input_X.read() output_f = open(sys.argv[4], 'w') dictLoader1 = aL.arffLoader() #testContent = dictLoader1.load(dict1) dictLoader1.load(dict1) dictLoader2 = aL.arffLoader() dictLoader2.load(dict2) testLoader = aL.arffLoader() testLoader.load(testSignal) # #print dictLoader1.classIndex #print dictLoader1.attrName #print dictLoader1.transactionContentList[0] #print dictLoader1.numInstance D1 = dictLoader1.fortranArray(dictLoader1.transactionContentList)
Created on 2014/7/3 @author: bhchen ''' import arffLoader as aL import sys import spams import time import numpy as np Ds = [] numDicts = len(sys.argv) - 2 for dictNo in range(numDicts): input_file = open(sys.argv[dictNo + 1]) dictContent = input_file.read() dictLoaders = aL.arffLoader() dictLoaders.load(dictContent) Ds.append(dictLoaders.fortranArray(dictLoaders.transactionContentList)) Ds[dictNo] = np.asfortranarray( Ds[dictNo] / np.tile(np.sqrt((Ds[dictNo] * Ds[dictNo]).sum(axis=0)), (Ds[dictNo].shape[0], 1))) input_X = open(sys.argv[len(sys.argv) - 1]) testSignal = input_X.read() testLoader = aL.arffLoader() testLoader.load(testSignal) X = testLoader.fortranArray(testLoader.transactionContentList) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)), (X.shape[0], 1)))
def reChooseDict(instNo, currDict, currMean, weightWindowDsCurDict, currStdCompare, currMeanCompare, dicts, testDWindow, numAttrs, numInsts, outputCompare, Lambda): outputCompare.write('Re-Choose Dictionary!\n') outputPredictSparse.write('Re-Choose Dictionary!\n') weightWindowDsRe = [] # tmpsRe = [] tmpsRe = 0 # splitByEnterDsRe = [] splitByEnterDsRe = 0 numDicts = len(dicts) for i in range(numDicts): weightWindowDsRe.append(deque()) dictWeightMeanRe = {} testWindow = aL.arffLoader() testD = testWindow.fortranArrayPara(testDWindow, numAttrs, numInsts) testD = np.asfortranarray(testD / np.tile(np.sqrt((testD * testD).sum(axis=0)), (testD.shape[0], 1))) for dictNo in range(numDicts): # tmpsRe[:] = [] # splitByEnterDsRe[:] = [] if (dictNo == currDict): weightWindowDsRe[dictNo] = weightWindowDsCurDict # continue alpha_lasso_m1_Ds_batch = spams.lasso(testD, dicts[dictNo], return_reg_path=False, lambda1=1, pos=True, mode=0) for j in range(alpha_lasso_m1_Ds_batch.shape[1]): # tmpsRe.append(str(alpha_lasso_m1_Ds_batch.getcol(j))) tmpsRe = str(alpha_lasso_m1_Ds_batch.getcol(j)) #instNo+j才是正確的instance Number outputCompare.write( str(instNo - len(testDWindow) + j) + '-D' + str(dictNo) + ':' + tmpsRe + '\n\n') #split #print tmpsRe[i].split('\n') # splitByEnterDsRe.append(tmpsRe[j].split('\n')) splitByEnterDsRe = tmpsRe.split('\n') # splitByEnterD1 = tmp1.split('\n') weightTmp = [] # for line in splitByEnterDsRe[j]: for line in splitByEnterDsRe: line = line.strip() #mapping page pageNo = int(line.split(',')[0].split('(')[1].strip()) #weight of mapping page weight = float(line.split(',')[1].split(')')[1].strip()) if weight < Lambda + 0.02: weightTmp.append(weight) # if weight >= 1: # print 'InstNo:'+str(instNo+j)+', DictNo:'+str(dictNo)+', page:'+str(pageNo)+', weight:'+str(weight) maxWeight = max(weightTmp) weightTmp[:] = [] #one page bug if weight = 0.0 transform to 1 if maxWeight == 0: maxWeight = 1 if maxWeight > 1: maxWeight = 1 weightWindowDsRe[dictNo].append(maxWeight) #choose dict for dictNo in range(numOfDicts): #若是目前的Dict則直接給值 if (dictNo == currDict): dictWeightMeanRe[dictNo] = currMean #若不是則要重新計算一次 else: dictWeightMeanRe[dictNo] = np.mean(weightWindowDsRe[dictNo]) #找出值最大的Dict,以及平均值 maxWeightDict, meanCompareRe = max(dictWeightMeanRe.iteritems(), key=lambda x: x[1]) if (maxWeightDict == currDict): #更新currMean # meanCompareRe = currMean #保留舊的currMeanCompare meanCompareRe = currMeanCompare #更新stdCompare #保留舊的stdCompare stdCompareRe = currStdCompare # weightWindowDsRe[maxWeightDict] = weightWindowDsCurDict outputCompare.write('Keep same model' + str(maxWeightDict) + '!\n') outputPredictSparse.write('Keep same model' + str(maxWeightDict) + '!\n') else: # changeModelRe = 1 stdCompareRe = np.std(weightWindowDsRe[maxWeightDict]) outputCompare.write('Change model to model-' + str(maxWeightDict) + ',meanCompare:' + str(meanCompareRe) + ',stdCompare:' + str(stdCompareRe) + '!\n') outputPredictSparse.write('Change model to model-' + str(maxWeightDict) + ',meanCompare:' + str(meanCompareRe) + ',stdCompare:' + str(stdCompareRe) + '!\n') return (maxWeightDict, meanCompareRe, stdCompareRe, weightWindowDsRe[maxWeightDict])
Created on 2014/7/3 @author: bhchen ''' import arffLoader as aL import sys import spams import time import numpy as np Ds = [] numDicts = len(sys.argv)-2 for dictNo in range(numDicts): input_file = open(sys.argv[dictNo+1]) dictContent = input_file.read() dictLoaders = aL.arffLoader() dictLoaders.load(dictContent) Ds.append(dictLoaders.fortranArray(dictLoaders.transactionContentList)) Ds[dictNo] = np.asfortranarray(Ds[dictNo] / np.tile(np.sqrt((Ds[dictNo]*Ds[dictNo]).sum(axis=0)),(Ds[dictNo].shape[0],1))) input_X = open(sys.argv[len(sys.argv)-1]) testSignal = input_X.read() testLoader = aL.arffLoader() testLoader.load(testSignal) X = testLoader.fortranArray(testLoader.transactionContentList) X = np.asfortranarray(X / np.tile(np.sqrt((X*X).sum(axis=0)),(X.shape[0],1))) alpha1Lambda = 1 alpha_lasso_m1_Ds = []
@author: bhchen 待處理Bug: 1.第17.18筆會回傳錯誤訊息, weight為0但是似乎會選中正確的instance ''' import arffLoader as aL import sys import spams import time import numpy as np import math #test signal input_X = open(sys.argv[1]) testSignal = input_X.read() testLoader = aL.arffLoader() testLoader.load(testSignal) X = testLoader.fortranArray(testLoader.transactionContentList) print 'X:' print X X = np.asfortranarray(X / np.tile(np.sqrt((X*X).sum(axis=0)),(X.shape[0],1))) print 'normalize of X:' print X numOfDicts = len(sys.argv)-5 #Load dictionaries input_files = [] dicts = [] dictLoaders = [] Ds = []
def reChooseDict(instNo,currDict,currMean,currStd,dicts,testDWindow,numAttrs,numInsts,outputCompare,Lambda): outputCompare.write('Re-Choose Dictionary!\n') errorWindowDsRe = [] # tmpsRe = [] tmpsRe = 0 splitByEnterDsRe = 0 numDicts = len(dicts) for i in range(numDicts): errorWindowDsRe.append(deque()) dictErrorMeanRe = {} testWindow = aL.arffLoader() testD = testWindow.fortranArrayPara(testDWindow,numAttrs,numInsts) testD = np.asfortranarray(testD / np.tile(np.sqrt((testD*testD).sum(axis=0)),(testD.shape[0],1))) dictInfosRe = {} for dictNo in range(numDicts): if(dictNo==currDict): continue alpha_lasso_m1_Ds_batch = spams.lasso(testD,dicts[dictNo],return_reg_path = False,lambda1 = Lambda,pos=True,mode=0) for j in range(alpha_lasso_m1_Ds_batch.shape[1]): currentTestDataRe = testDWindow[j] testX = testLoader.singleFortranArray(currentTestDataRe) testX = np.asfortranarray(testX / np.tile(np.sqrt((testX*testX).sum(axis=0)),(testX.shape[0],1))) dictInfosRe.clear() # tmpsRe.append(str(alpha_lasso_m1_Ds_batch.getcol(j))) tmpsRe = str(alpha_lasso_m1_Ds_batch.getcol(j)) #instNo+j才是正確的instance Number # outputCompare.write(str(instNo-len(numAlgoWindow)+j)+'-D'+str(dictNo)+':'+tmpsRe[j]+'\n\n') outputCompare.write(str(instNo-len(testDWindow)+j)+'-D'+str(dictNo)+':'+tmpsRe+'\n\n') #split #print tmps[i].split('\n') # splitByEnterDsRe.append(tmpsRe[j].split('\n')) # splitByEnterDsRe.append(tmpsRe.split('\n')) # splitByEnterDsRe = tmpsRe[j].split('\n') splitByEnterDsRe = tmpsRe.split('\n') # splitByEnterD1 = tmp1.split('\n') # weightTmp = [] # for line in splitByEnterDsRe[i]: for line in splitByEnterDsRe: line = line.strip() #mapping page pageNo = int(line.split(',')[0].split('(')[1].strip()) #weight of mapping page weight = float(line.split(',')[1].split(')')[1].strip()) #到這邊已把pageNo與weight Parse出 dictInfosRe[pageNo] = weight error = errorCaculation(testX, dicts[dictNo], dictInfosRe, Lambda) errorWindowDsRe[dictNo].append(error) #choose dict for dictNo in range(numOfDicts): dictErrorMeanRe[dictNo] = np.mean(errorWindowDs[dictNo]) maxWeightDict,meanCompareRe = max(dictErrorMeanRe.iteritems(), key=lambda x:x[1]) stdCompareRe = np.std(errorWindowDs[currentDict]) return maxWeightDict,meanCompareRe,stdCompareRe
''' Created on 2014/3/18 @author: bhchen ''' import arffLoader as aL import sys import spams import time import numpy as np input_f1 = open(sys.argv[1]) dict1 = input_f1.read() dictLoader1 = aL.arffLoader() #testContent = dictLoader1.load(dict1) dictLoader1.load(dict1) print dictLoader1.classIndex print dictLoader1.attrName print dictLoader1.transactionContentList[0] print dictLoader1.numInstance D1 = dictLoader1.fortranArray(dictLoader1.transactionContentList) print 'D1:' print D1 print D1.shape[0] tmp = (D1*D1).sum(axis=0) print 'tmp:' print tmp print tmp.shape[0] notmp = np.sqrt((D1*D1).sum(axis=0)) print 'notmp:'