def reChooseDict(instNo,currDict,currMean,weightWindowDsCurDict,currStdCompare,currMeanCompare,dicts,testDWindow,numAttrs,numInsts,outputCompare,Lambda):
    outputCompare.write('Re-Choose Dictionary!\n')
    outputPredictSparse.write('Re-Choose Dictionary!\n')
    
    weightWindowDsRe = []
#    tmpsRe = []
    tmpsRe = 0
#    splitByEnterDsRe = []
    splitByEnterDsRe = 0
    numDicts = len(dicts)
    for i in range(numDicts):
        weightWindowDsRe.append(deque())
    dictWeightMeanRe = {}
    testWindow = aL.arffLoader()
    testD = testWindow.fortranArrayPara(testDWindow,numAttrs,numInsts)
    testD = np.asfortranarray(testD / np.tile(np.sqrt((testD*testD).sum(axis=0)),(testD.shape[0],1)))
    for dictNo in range(numDicts):
#        tmpsRe[:] = []
#        splitByEnterDsRe[:] = []
        if(dictNo==currDict):
            weightWindowDsRe[dictNo] = weightWindowDsCurDict
            continue
        alpha_lasso_m1_Ds_batch = spams.lasso(testD,dicts[dictNo],return_reg_path = False,lambda1 = 1,pos=True,mode=0)
        
        for j in range(alpha_lasso_m1_Ds_batch.shape[1]):
#            tmpsRe.append(str(alpha_lasso_m1_Ds_batch.getcol(j)))
            tmpsRe = str(alpha_lasso_m1_Ds_batch.getcol(j))
            #instNo+j才是正確的instance Number
            outputCompare.write(str(instNo-len(testDWindow)+j)+'-D'+str(dictNo)+':'+tmpsRe+'\n\n')
            #split
            #print tmpsRe[i].split('\n')
#            splitByEnterDsRe.append(tmpsRe[j].split('\n'))
            splitByEnterDsRe = tmpsRe.split('\n')
            #        splitByEnterD1 = tmp1.split('\n')
            weightTmp = []
#            for line in splitByEnterDsRe[j]:
            for line in splitByEnterDsRe:
                line = line.strip()
                #mapping page
                pageNo = int(line.split(',')[0].split('(')[1].strip())
                #weight of mapping page
                weight = float(line.split(',')[1].split(')')[1].strip())
                if weight < Lambda + 0.02:
                    weightTmp.append(weight)
#                if weight >= 1:
#                    print 'InstNo:'+str(instNo+j)+', DictNo:'+str(dictNo)+', page:'+str(pageNo)+', weight:'+str(weight)
            maxWeight = max(weightTmp)
            weightTmp[:] = []
            #one page bug if weight = 0.0 transform to 1
            if maxWeight==0:
                maxWeight = 1
            if maxWeight > 1:
                maxWeight = 1
            weightWindowDsRe[dictNo].append(maxWeight)
            
    #choose dict
    for dictNo in range(numOfDicts):
        #若是目前的Dict則直接給值
        if(dictNo==currDict):
            dictWeightMeanRe[dictNo] = currMean
        #若不是則要重新計算一次
        else:
            dictWeightMeanRe[dictNo] = np.mean(weightWindowDsRe[dictNo])
    #找出值最大的Dict,以及平均值
    maxWeightDict,meanCompareRe = max(dictWeightMeanRe.iteritems(), key=lambda x:x[1])
    if(maxWeightDict==currDict):
        #更新currMean
        meanCompareRe = currMean
        #保留舊的currMeanCompare
#        meanCompareRe = currMeanCompare
        #更新stdCompare
        
        #保留舊的stdCompare
#        stdCompareRe = currStdCompare
        #更新stdCompare
        stdCompareRe = np.std(weightWindowDsRe[maxWeightDict])
#        weightWindowDsRe[maxWeightDict] = weightWindowDsCurDict
        outputCompare.write('Keep same model'+str(maxWeightDict)+'!\n')
        outputPredictSparse.write('Keep same model'+str(maxWeightDict)+'!\n')
    else:
#        changeModelRe = 1
        stdCompareRe = np.std(weightWindowDsRe[maxWeightDict])
        outputCompare.write('Change model to model-' + str(maxWeightDict) +',meanCompare:'+str(meanCompareRe)+',stdCompare:'+str(stdCompareRe)+'!\n')
        outputPredictSparse.write('Change model to model-' + str(maxWeightDict) +',meanCompare:'+str(meanCompareRe)+',stdCompare:'+str(stdCompareRe)+'!\n')
    
    return (maxWeightDict,meanCompareRe,stdCompareRe,weightWindowDsRe[maxWeightDict])
Esempio n. 2
0
#numOfDicts = len(sys.argv)-5
#algowindow
numOfDicts = (len(sys.argv)-8) / 2

#Load dictionaries
input_files = []
dicts = []
dictLoaders = []
Ds = []
for dictNo in range(numOfDicts):
#    input_files.append(open(sys.argv[i+2]))
    input_file = open(sys.argv[dictNo+2])
#    dicts.append(input_files[i].read())
#    dicts.append(input_file.read())
    dict = input_file.read()
    dictLoaders.append(aL.arffLoader())
#    dictLoaders[i].load(dicts[i])
    dictLoaders[dictNo].load(dict)
    
    Ds.append(dictLoaders[dictNo].fortranArray(dictLoaders[dictNo].transactionContentList))
#    print 'D'+str(dictNo+1)+':'
#    print Ds[dictNo]
    Ds[dictNo] = np.asfortranarray(Ds[dictNo] / np.tile(np.sqrt((Ds[dictNo]*Ds[dictNo]).sum(axis=0)),(Ds[dictNo].shape[0],1)))
#    print np.tile(np.sqrt((Ds[i]*Ds[i]).sum(axis=0)
#    print 'normalize of D' + str(dictNo+1) + ':'
#    print Ds[dictNo]

algoResults = []
#Load result of other algo
for i in range(numOfDicts):
    input_file = open(sys.argv[i+numOfDicts+2])
    return (maxWeightDict,meanCompareRe,stdCompareRe,weightWindowDsRe[maxWeightDict])
    
Alltic = time.time()
#numOfDicts = len(sys.argv)-5
#algowindow
numOfDicts = (len(sys.argv)-12) / 2

#Load dictionaries
input_files = []
dicts = []
dictLoaders = []
Ds = []
for i in range(numOfDicts):
    input_files.append(open(sys.argv[i+2]))
    dicts.append(input_files[i].read())
    dictLoaders.append(aL.arffLoader())
    dictLoaders[i].load(dicts[i])
    Ds.append(dictLoaders[i].fortranArray(dictLoaders[i].transactionContentList))
#    print 'D'+str(i+1)+':'
#    print Ds[i]
    Ds[i] = np.asfortranarray(Ds[i] / np.tile(np.sqrt((Ds[i]*Ds[i]).sum(axis=0)),(Ds[i].shape[0],1)))
#    print np.tile(np.sqrt((Ds[i]*Ds[i]).sum(axis=0)
#    print 'normalize of D' + str(i+1) + ':'
#    print Ds[i]
    
algoResults = []
#Load result of other algo
for i in range(numOfDicts):
    input_file = open(sys.argv[i+numOfDicts+2])
    result = input_file.read()
    algoResults.append(aL.arffLoader())
'''
Created on 2014/3/18

@author: bhchen
'''
import arffLoader as aL
import sys
import spams
import time
import numpy as np
input_f1 = open(sys.argv[1])
dict1 = input_f1.read()

dictLoader1 = aL.arffLoader()
#testContent = dictLoader1.load(dict1)
dictLoader1.load(dict1)

print dictLoader1.classIndex
print dictLoader1.attrName
print dictLoader1.transactionContentList[0]
print dictLoader1.numInstance
D1 = dictLoader1.fortranArray(dictLoader1.transactionContentList)
print 'D1:'
print D1
print D1.shape[0]
tmp = (D1 * D1).sum(axis=0)
print 'tmp:'
print tmp
print tmp.shape[0]
notmp = np.sqrt((D1 * D1).sum(axis=0))
print 'notmp:'
@author: bhchen
待處理Bug:
1.第17.18筆會回傳錯誤訊息, weight為0但是似乎會選中正確的instance
'''
import arffLoader as aL
import sys
import spams
import time
import numpy as np
import math

#test signal
input_X = open(sys.argv[1])
testSignal = input_X.read()
testLoader = aL.arffLoader()
testLoader.load(testSignal)
X = testLoader.fortranArray(testLoader.transactionContentList)
print 'X:'
print X
X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),
                                  (X.shape[0], 1)))
print 'normalize of X:'
print X

numOfDicts = len(sys.argv) - 5

#Load dictionaries
input_files = []
dicts = []
dictLoaders = []
Esempio n. 6
0
import scipy as sp

#print np.__version__
#print sp.__version__
#dictionary 1
input_f1 = open(sys.argv[1])
dict1 = input_f1.read()
#dictionary 2
input_f2 = open(sys.argv[2])
dict2 = input_f2.read()
#test signal
input_X = open(sys.argv[3])
testSignal = input_X.read()
output_f = open(sys.argv[4], 'w')

dictLoader1 = aL.arffLoader()
#testContent = dictLoader1.load(dict1)
dictLoader1.load(dict1)

dictLoader2 = aL.arffLoader()
dictLoader2.load(dict2)

testLoader = aL.arffLoader()
testLoader.load(testSignal)

#
#print dictLoader1.classIndex
#print dictLoader1.attrName
#print dictLoader1.transactionContentList[0]
#print dictLoader1.numInstance
D1 = dictLoader1.fortranArray(dictLoader1.transactionContentList)
Esempio n. 7
0
Created on 2014/7/3

@author: bhchen
'''
import arffLoader as aL
import sys
import spams
import time
import numpy as np

Ds = []
numDicts = len(sys.argv) - 2
for dictNo in range(numDicts):
    input_file = open(sys.argv[dictNo + 1])
    dictContent = input_file.read()
    dictLoaders = aL.arffLoader()
    dictLoaders.load(dictContent)
    Ds.append(dictLoaders.fortranArray(dictLoaders.transactionContentList))
    Ds[dictNo] = np.asfortranarray(
        Ds[dictNo] / np.tile(np.sqrt((Ds[dictNo] * Ds[dictNo]).sum(axis=0)),
                             (Ds[dictNo].shape[0], 1)))

input_X = open(sys.argv[len(sys.argv) - 1])
testSignal = input_X.read()
testLoader = aL.arffLoader()
testLoader.load(testSignal)

X = testLoader.fortranArray(testLoader.transactionContentList)
X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),
                                  (X.shape[0], 1)))
def reChooseDict(instNo, currDict, currMean, weightWindowDsCurDict,
                 currStdCompare, currMeanCompare, dicts, testDWindow, numAttrs,
                 numInsts, outputCompare, Lambda):
    outputCompare.write('Re-Choose Dictionary!\n')
    outputPredictSparse.write('Re-Choose Dictionary!\n')

    weightWindowDsRe = []
    #    tmpsRe = []
    tmpsRe = 0
    #    splitByEnterDsRe = []
    splitByEnterDsRe = 0
    numDicts = len(dicts)
    for i in range(numDicts):
        weightWindowDsRe.append(deque())
    dictWeightMeanRe = {}
    testWindow = aL.arffLoader()
    testD = testWindow.fortranArrayPara(testDWindow, numAttrs, numInsts)
    testD = np.asfortranarray(testD /
                              np.tile(np.sqrt((testD * testD).sum(axis=0)),
                                      (testD.shape[0], 1)))
    for dictNo in range(numDicts):
        #        tmpsRe[:] = []
        #        splitByEnterDsRe[:] = []
        if (dictNo == currDict):
            weightWindowDsRe[dictNo] = weightWindowDsCurDict
#            continue
        alpha_lasso_m1_Ds_batch = spams.lasso(testD,
                                              dicts[dictNo],
                                              return_reg_path=False,
                                              lambda1=1,
                                              pos=True,
                                              mode=0)

        for j in range(alpha_lasso_m1_Ds_batch.shape[1]):
            #            tmpsRe.append(str(alpha_lasso_m1_Ds_batch.getcol(j)))
            tmpsRe = str(alpha_lasso_m1_Ds_batch.getcol(j))
            #instNo+j才是正確的instance Number
            outputCompare.write(
                str(instNo - len(testDWindow) + j) + '-D' + str(dictNo) + ':' +
                tmpsRe + '\n\n')
            #split
            #print tmpsRe[i].split('\n')
            #            splitByEnterDsRe.append(tmpsRe[j].split('\n'))
            splitByEnterDsRe = tmpsRe.split('\n')
            #        splitByEnterD1 = tmp1.split('\n')
            weightTmp = []
            #            for line in splitByEnterDsRe[j]:
            for line in splitByEnterDsRe:
                line = line.strip()
                #mapping page
                pageNo = int(line.split(',')[0].split('(')[1].strip())
                #weight of mapping page
                weight = float(line.split(',')[1].split(')')[1].strip())
                if weight < Lambda + 0.02:
                    weightTmp.append(weight)


#                if weight >= 1:
#                    print 'InstNo:'+str(instNo+j)+', DictNo:'+str(dictNo)+', page:'+str(pageNo)+', weight:'+str(weight)
            maxWeight = max(weightTmp)
            weightTmp[:] = []
            #one page bug if weight = 0.0 transform to 1
            if maxWeight == 0:
                maxWeight = 1
            if maxWeight > 1:
                maxWeight = 1
            weightWindowDsRe[dictNo].append(maxWeight)

    #choose dict
    for dictNo in range(numOfDicts):
        #若是目前的Dict則直接給值
        if (dictNo == currDict):
            dictWeightMeanRe[dictNo] = currMean
        #若不是則要重新計算一次
        else:
            dictWeightMeanRe[dictNo] = np.mean(weightWindowDsRe[dictNo])
    #找出值最大的Dict,以及平均值
    maxWeightDict, meanCompareRe = max(dictWeightMeanRe.iteritems(),
                                       key=lambda x: x[1])
    if (maxWeightDict == currDict):
        #更新currMean
        #        meanCompareRe = currMean
        #保留舊的currMeanCompare
        meanCompareRe = currMeanCompare
        #更新stdCompare

        #保留舊的stdCompare
        stdCompareRe = currStdCompare
        #        weightWindowDsRe[maxWeightDict] = weightWindowDsCurDict
        outputCompare.write('Keep same model' + str(maxWeightDict) + '!\n')
        outputPredictSparse.write('Keep same model' + str(maxWeightDict) +
                                  '!\n')
    else:
        #        changeModelRe = 1
        stdCompareRe = np.std(weightWindowDsRe[maxWeightDict])
        outputCompare.write('Change model to model-' + str(maxWeightDict) +
                            ',meanCompare:' + str(meanCompareRe) +
                            ',stdCompare:' + str(stdCompareRe) + '!\n')
        outputPredictSparse.write('Change model to model-' +
                                  str(maxWeightDict) + ',meanCompare:' +
                                  str(meanCompareRe) + ',stdCompare:' +
                                  str(stdCompareRe) + '!\n')

    return (maxWeightDict, meanCompareRe, stdCompareRe,
            weightWindowDsRe[maxWeightDict])
Created on 2014/7/3

@author: bhchen
'''
import arffLoader as aL
import sys
import spams
import time
import numpy as np

Ds = []
numDicts = len(sys.argv)-2
for dictNo in range(numDicts):
    input_file = open(sys.argv[dictNo+1])
    dictContent = input_file.read()
    dictLoaders = aL.arffLoader()
    dictLoaders.load(dictContent)
    Ds.append(dictLoaders.fortranArray(dictLoaders.transactionContentList))
    Ds[dictNo] = np.asfortranarray(Ds[dictNo] / np.tile(np.sqrt((Ds[dictNo]*Ds[dictNo]).sum(axis=0)),(Ds[dictNo].shape[0],1)))
    

input_X = open(sys.argv[len(sys.argv)-1])
testSignal = input_X.read()
testLoader = aL.arffLoader()
testLoader.load(testSignal)

X = testLoader.fortranArray(testLoader.transactionContentList)
X = np.asfortranarray(X / np.tile(np.sqrt((X*X).sum(axis=0)),(X.shape[0],1)))

alpha1Lambda = 1
alpha_lasso_m1_Ds = []
@author: bhchen
待處理Bug:
1.第17.18筆會回傳錯誤訊息, weight為0但是似乎會選中正確的instance
'''
import arffLoader as aL
import sys
import spams
import time
import numpy as np
import math

#test signal
input_X = open(sys.argv[1])
testSignal = input_X.read()
testLoader = aL.arffLoader()
testLoader.load(testSignal)
X = testLoader.fortranArray(testLoader.transactionContentList)
print 'X:'
print X
X = np.asfortranarray(X / np.tile(np.sqrt((X*X).sum(axis=0)),(X.shape[0],1)))
print 'normalize of X:'
print X

numOfDicts = len(sys.argv)-5

#Load dictionaries
input_files = []
dicts = []
dictLoaders = []
Ds = []
Esempio n. 11
0
def reChooseDict(instNo,currDict,currMean,currStd,dicts,testDWindow,numAttrs,numInsts,outputCompare,Lambda):
    outputCompare.write('Re-Choose Dictionary!\n')
    errorWindowDsRe = []
#    tmpsRe = []
    tmpsRe = 0
    splitByEnterDsRe = 0
    numDicts = len(dicts)
    for i in range(numDicts):
        errorWindowDsRe.append(deque())
    dictErrorMeanRe = {}
    testWindow = aL.arffLoader()
    testD = testWindow.fortranArrayPara(testDWindow,numAttrs,numInsts)
    testD = np.asfortranarray(testD / np.tile(np.sqrt((testD*testD).sum(axis=0)),(testD.shape[0],1)))
    dictInfosRe = {}
    for dictNo in range(numDicts):
        if(dictNo==currDict):
            continue
        alpha_lasso_m1_Ds_batch = spams.lasso(testD,dicts[dictNo],return_reg_path = False,lambda1 = Lambda,pos=True,mode=0)
        
        for j in range(alpha_lasso_m1_Ds_batch.shape[1]):
            currentTestDataRe = testDWindow[j]
            testX = testLoader.singleFortranArray(currentTestDataRe)
            testX = np.asfortranarray(testX / np.tile(np.sqrt((testX*testX).sum(axis=0)),(testX.shape[0],1)))
            
            dictInfosRe.clear()
#            tmpsRe.append(str(alpha_lasso_m1_Ds_batch.getcol(j)))
            tmpsRe = str(alpha_lasso_m1_Ds_batch.getcol(j))
            #instNo+j才是正確的instance Number
#            outputCompare.write(str(instNo-len(numAlgoWindow)+j)+'-D'+str(dictNo)+':'+tmpsRe[j]+'\n\n')
            outputCompare.write(str(instNo-len(testDWindow)+j)+'-D'+str(dictNo)+':'+tmpsRe+'\n\n')
            #split
            #print tmps[i].split('\n')
#            splitByEnterDsRe.append(tmpsRe[j].split('\n'))
#            splitByEnterDsRe.append(tmpsRe.split('\n'))
#            splitByEnterDsRe = tmpsRe[j].split('\n')
            splitByEnterDsRe = tmpsRe.split('\n')
            
            
            #        splitByEnterD1 = tmp1.split('\n')
#            weightTmp = []
#            for line in splitByEnterDsRe[i]:
            for line in splitByEnterDsRe:
                line = line.strip()
                #mapping page
                pageNo = int(line.split(',')[0].split('(')[1].strip())
                #weight of mapping page
                weight = float(line.split(',')[1].split(')')[1].strip())
                #到這邊已把pageNo與weight Parse出
                dictInfosRe[pageNo] = weight
            
            error = errorCaculation(testX, dicts[dictNo], dictInfosRe, Lambda)
            
            errorWindowDsRe[dictNo].append(error)
            
    #choose dict
    for dictNo in range(numOfDicts):
        dictErrorMeanRe[dictNo] = np.mean(errorWindowDs[dictNo])
    maxWeightDict,meanCompareRe = max(dictErrorMeanRe.iteritems(), key=lambda x:x[1])
    stdCompareRe = np.std(errorWindowDs[currentDict])
    
    return maxWeightDict,meanCompareRe,stdCompareRe
'''
Created on 2014/3/18

@author: bhchen
'''
import arffLoader as aL
import sys
import spams
import time
import numpy as np
input_f1 = open(sys.argv[1])
dict1 = input_f1.read()

dictLoader1 = aL.arffLoader()
#testContent = dictLoader1.load(dict1)
dictLoader1.load(dict1)

print dictLoader1.classIndex
print dictLoader1.attrName
print dictLoader1.transactionContentList[0]
print dictLoader1.numInstance
D1 = dictLoader1.fortranArray(dictLoader1.transactionContentList)
print 'D1:'
print D1
print D1.shape[0]
tmp = (D1*D1).sum(axis=0)
print 'tmp:'
print tmp
print tmp.shape[0]
notmp = np.sqrt((D1*D1).sum(axis=0))
print 'notmp:'