def generateTestUser(self, needNeighbor=False, needNeighborDim=False): self.userList = usergeneration.userSet nThreads = 4 basicmining.profileDict = basicmining.generateProfilesMultiThread( usergeneration.userSet, nThreads) for u in self.userList: print(u) datapreparation.prepareData(u) self.userTrainData[u] = datapreparation.getTraindata(u) self.userTestData[u] = datapreparation.getTestdata(u) # generate timestamps # should only use test set? print 'stage 1' allRecord = commonoperation.getAllRecordsofAPassengerFromDB(u) proportion = 0.7 splitlingPoint = int(len(allRecord) * proportion) self.lastRecordOfTrainingSet[u] = allRecord[splitlingPoint - 1] for record in allRecord[splitlingPoint:]: if u in self.realRecord.keys(): self.realRecord[u].append(record) self.timestamp[u].append(record[11]) else: self.realRecord[u] = [record] self.timestamp[u] = [record[11]] print 'stage 2' if needNeighbor: featuresListOfNeighbors = [] neighbors = usergeneration.getSimilarUsers(u) for n in neighbors: featuresListOfNeighbors += featureextraction.generateFeaturesList( n) self.featureListOfNeighbors[u] = featuresListOfNeighbors print 'stage 3' if needNeighborDim: featuresListOfNeighborsDims = [] for i in range(0, len(basicmining.getProfile(u))): neighbors = usergeneration.getSimilarUsersDim(u, i) listTemp = [] for n in neighbors: listTemp += (featureextraction.generateFeaturesList(n)) featuresListOfNeighborsDims.append(listTemp) self.featureListOfNeighborsDim[u] = featuresListOfNeighborsDims print 'stage 4' return 0
# DATA COLLECTION # ___________________________________________________________________________ #NOTE: As stated above, by adding to the parameters with arrays such as # pvalCutOff (i.e. pvalCutOff = [.005, .01]), the program will collect the # data multiple times to compare Aux + and Aux - values. # HOWEVER: ONLY THE FINAL ITERATION WILL BE USED IN TENSORFLOW! for i in range(0, len(catsToUse)): #Call 'datapreparation.py' and convert all of the matlab files into TensorFlow readable format df_total = dp.prepareData(numCells=numCells, folderPath=folderPath, dropCols=dropCols, showHist=showHist, catsToUse=catsToUse[i], pvalCutOff=pvalCutOff[i], numConsecPVal=numConsecPVal[i], mustBeSecondHalf=mustBeSecondHalf[i], showPrints=showPrints, showBoxplot=showBoxplot, checkLifetime=checkLifetime[i]) #Separate (as a checkpoint) the aux + and aux - to describe them df_aux0 = df_total.loc[df_total['aux'].isin([0])] df_aux1 = df_total.loc[df_total['aux'].isin([1])] print("Aux + and - info:") print(df_aux0.describe()) print(df_aux1.describe()) print("RUN " + str(i) + ": Cats Used: " + str(catsToUse[i]) + " | pval: " +
def modelUsers(users): # experiment 1 # users=usergeneration.generateSamplesOfActiveUsers(10000) for u in users: print(u) datapreparation.prepareData(u) traindata = datapreparation.getTraindata(u) testdata = datapreparation.getTestdata(u) print('only train data') # GMM m1 = comparemethods.GMMModel(u, modelName='GMM1_2', trainData=traindata, testData=testdata) m1.setVariables(nComponents=2) m1.run() m1 = comparemethods.GMMModel(u, modelName='GMM1_3', trainData=traindata, testData=testdata) m1.setVariables(nComponents=3) m1.run() m2 = comparemethods.fKDEModel(u, modelName='fKDE1_Silverman', trainData=traindata, testData=testdata) m2.setVariables(bandwidth='Silverman') m2.run() neighbors = usergeneration.getSimilarUsers(u) featuresListOfNeighbors = [] for n in neighbors: featuresListOfNeighbors += featureextraction.generateFeaturesList( n) print('with others data') print('GMM') m3 = comparemethods.GMMModel(u, modelName='GMM2_2', trainData=traindata + featuresListOfNeighbors, testData=testdata) m3.setVariables(nComponents=2) m3.run() m3 = comparemethods.GMMModel(u, modelName='GMM2_3', trainData=traindata + featuresListOfNeighbors, testData=testdata) m3.setVariables(nComponents=3) m3.run() print('fKDE2_Silverman') m4 = comparemethods.fKDEModel(u, modelName='fKDE2_Silverman', trainData=traindata + featuresListOfNeighbors, testData=testdata) m4.setVariables(bandwidth='Silverman') m4.run() print('mix-fKDE1_Silverman') m5 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE1_Silverman', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m5.setVariables(bandwidth='Silverman', bandwidth1='Silverman') m5.run() print('mix-fKDE2_cv') m6 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml') m6.run()
def modelUsers3(users): ''' Compare mix-fKDE2_cv with different kernels ''' for u in users: print(u) datapreparation.prepareData(u) # train,test data from this user traindata = datapreparation.getTraindata(u) testdata = datapreparation.getTestdata(u) neighbors = usergeneration.getSimilarUsers(u) # feature from other users, to make mix KDE featuresListOfNeighbors = [] for n in neighbors: featuresListOfNeighbors += featureextraction.generateFeaturesList( n) m2 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv_tophat', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m2.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='tophat') m2.run() m3 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv_epanechnikov', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m3.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='epanechnikov') m3.run() m4 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv_exponential', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m4.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='exponential') m4.run() m5 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv_linear', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m5.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='linear') m5.run() m6 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv_cosine', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='cosine') m6.run() m1 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv_gaussian', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m1.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='gaussian') m1.run()
def modelUsers2(users): # experiment 2 ''' Compare different methods with different bandwidths ''' # users=usergeneration.generateSamplesOfActiveUsers(10000) for u in users: print(u) datapreparation.prepareData(u) traindata = datapreparation.getTraindata(u) testdata = datapreparation.getTestdata(u) print('only train data') # GMM m1 = comparemethods.GMMModel(u, modelName='GMM1_2', trainData=traindata, testData=testdata) m1.setVariables(nComponents=2) m1.run() m1 = comparemethods.GMMModel(u, modelName='GMM1_3', trainData=traindata, testData=testdata) m1.setVariables(nComponents=3) m1.run() m2 = comparemethods.fKDEModel(u, modelName='fKDE1_Silverman', trainData=traindata, testData=testdata) m2.setVariables(bandwidth='Silverman') m2.run() m2 = comparemethods.fKDEModel(u, modelName='fKDE1_0.5', trainData=traindata, testData=testdata) m2.setVariables(bandwidth=0.5) m2.run() m2 = comparemethods.fKDEModel(u, modelName='fKDE1_1', trainData=traindata, testData=testdata) m2.setVariables(bandwidth=1) m2.run() m2 = comparemethods.fKDEModel(u, modelName='fKDE1_1.5', trainData=traindata, testData=testdata) m2.setVariables(bandwidth=1.5) m2.run() neighbors = usergeneration.getSimilarUsers(u) featuresListOfNeighbors = [] for n in neighbors: featuresListOfNeighbors += featureextraction.generateFeaturesList( n) # try another method: find neighbors on each dimension, each idx represents a dim featuresListOfNeighborsDims = [] for i in range(0, len(basicmining.getProfile(u))): neighbors = usergeneration.getSimilarUsersDim(u, i) listTemp = [] for n in neighbors: listTemp += (featureextraction.generateFeaturesList(n)) featuresListOfNeighborsDims.append(listTemp) print('with others data') m3 = comparemethods.GMMModel(u, modelName='GMM2_2', trainData=traindata + featuresListOfNeighbors, testData=testdata) m3.setVariables(nComponents=2) m3.run() m3 = comparemethods.GMMModel(u, modelName='GMM2_3', trainData=traindata + featuresListOfNeighbors, testData=testdata) m3.setVariables(nComponents=3) m3.run() # print('fKDE2_Silverman') m4 = comparemethods.fKDEModel(u, modelName='fKDE2_Silverman', trainData=traindata + featuresListOfNeighbors, testData=testdata) m4.setVariables(bandwidth='Silverman') m4.run() # print('mix-fKDE2_bw') m6 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_Silverman', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m6.setVariables(bandwidth='Silverman', bandwidth1='Silverman') m6.run() m6 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_0.5', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m6.setVariables(bandwidth=0.5, bandwidth1=0.5) m6.run() m6 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_1', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m6.setVariables(bandwidth=1, bandwidth1=1) m6.run() m6 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_1.5', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m6.setVariables(bandwidth=1.5, bandwidth1=1.5) m6.run() m6 = comparemethods.mixfKDEModel( u, modelName='mix-fKDE2_cv', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighbors) m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml') m6.run() # test set components according to the dim m7 = comparemethods.mixfKDEModelDim( u, modelName='mix-fKDE2_cv_moreComponents', trainData=traindata, testData=testdata, trainDataOfNeighbors=featuresListOfNeighborsDims) m7.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml') m7.run()