Example #1
0
 def generateTestUser(self, needNeighbor=False, needNeighborDim=False):
     self.userList = usergeneration.userSet
     nThreads = 4
     basicmining.profileDict = basicmining.generateProfilesMultiThread(
         usergeneration.userSet, nThreads)
     for u in self.userList:
         print(u)
         datapreparation.prepareData(u)
         self.userTrainData[u] = datapreparation.getTraindata(u)
         self.userTestData[u] = datapreparation.getTestdata(u)
         # generate timestamps
         # should only use test set?
         print 'stage 1'
         allRecord = commonoperation.getAllRecordsofAPassengerFromDB(u)
         proportion = 0.7
         splitlingPoint = int(len(allRecord) * proportion)
         self.lastRecordOfTrainingSet[u] = allRecord[splitlingPoint - 1]
         for record in allRecord[splitlingPoint:]:
             if u in self.realRecord.keys():
                 self.realRecord[u].append(record)
                 self.timestamp[u].append(record[11])
             else:
                 self.realRecord[u] = [record]
                 self.timestamp[u] = [record[11]]
         print 'stage 2'
         if needNeighbor:
             featuresListOfNeighbors = []
             neighbors = usergeneration.getSimilarUsers(u)
             for n in neighbors:
                 featuresListOfNeighbors += featureextraction.generateFeaturesList(
                     n)
             self.featureListOfNeighbors[u] = featuresListOfNeighbors
         print 'stage 3'
         if needNeighborDim:
             featuresListOfNeighborsDims = []
             for i in range(0, len(basicmining.getProfile(u))):
                 neighbors = usergeneration.getSimilarUsersDim(u, i)
                 listTemp = []
                 for n in neighbors:
                     listTemp += (featureextraction.generateFeaturesList(n))
                 featuresListOfNeighborsDims.append(listTemp)
             self.featureListOfNeighborsDim[u] = featuresListOfNeighborsDims
         print 'stage 4'
     return 0
Example #2
0
#                              DATA COLLECTION
# ___________________________________________________________________________

#NOTE: As stated above, by adding to the parameters with arrays such as
#      pvalCutOff (i.e. pvalCutOff = [.005, .01]), the program will collect the
#      data multiple times to compare Aux + and Aux - values.
#      HOWEVER: ONLY THE FINAL ITERATION WILL BE USED IN TENSORFLOW!

for i in range(0, len(catsToUse)):
    #Call 'datapreparation.py' and convert all of the matlab files into TensorFlow readable format
    df_total = dp.prepareData(numCells=numCells,
                              folderPath=folderPath,
                              dropCols=dropCols,
                              showHist=showHist,
                              catsToUse=catsToUse[i],
                              pvalCutOff=pvalCutOff[i],
                              numConsecPVal=numConsecPVal[i],
                              mustBeSecondHalf=mustBeSecondHalf[i],
                              showPrints=showPrints,
                              showBoxplot=showBoxplot,
                              checkLifetime=checkLifetime[i])

    #Separate (as a checkpoint) the aux + and aux - to describe them
    df_aux0 = df_total.loc[df_total['aux'].isin([0])]
    df_aux1 = df_total.loc[df_total['aux'].isin([1])]

    print("Aux + and - info:")
    print(df_aux0.describe())
    print(df_aux1.describe())

    print("RUN " + str(i) + ": Cats Used: " + str(catsToUse[i]) + " | pval: " +
Example #3
0
def modelUsers(users):  # experiment 1
    # users=usergeneration.generateSamplesOfActiveUsers(10000)
    for u in users:
        print(u)
        datapreparation.prepareData(u)
        traindata = datapreparation.getTraindata(u)
        testdata = datapreparation.getTestdata(u)

        print('only train data')
        # GMM
        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_2',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=2)
        m1.run()

        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_3',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=3)
        m1.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_Silverman',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth='Silverman')
        m2.run()

        neighbors = usergeneration.getSimilarUsers(u)
        featuresListOfNeighbors = []
        for n in neighbors:
            featuresListOfNeighbors += featureextraction.generateFeaturesList(
                n)

        print('with others data')
        print('GMM')
        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_2',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=2)
        m3.run()

        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_3',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=3)
        m3.run()

        print('fKDE2_Silverman')
        m4 = comparemethods.fKDEModel(u,
                                      modelName='fKDE2_Silverman',
                                      trainData=traindata +
                                      featuresListOfNeighbors,
                                      testData=testdata)
        m4.setVariables(bandwidth='Silverman')
        m4.run()

        print('mix-fKDE1_Silverman')
        m5 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE1_Silverman',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m5.setVariables(bandwidth='Silverman', bandwidth1='Silverman')
        m5.run()

        print('mix-fKDE2_cv')
        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml')
        m6.run()
Example #4
0
def modelUsers3(users):
    '''
	Compare mix-fKDE2_cv with different kernels
	'''
    for u in users:
        print(u)
        datapreparation.prepareData(u)
        # train,test data from this user
        traindata = datapreparation.getTraindata(u)
        testdata = datapreparation.getTestdata(u)

        neighbors = usergeneration.getSimilarUsers(u)
        # feature from other users, to make mix KDE
        featuresListOfNeighbors = []
        for n in neighbors:
            featuresListOfNeighbors += featureextraction.generateFeaturesList(
                n)

        m2 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_tophat',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m2.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='tophat')
        m2.run()

        m3 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_epanechnikov',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m3.setVariables(bandwidth='cv_ml',
                        bandwidth1='cv_ml',
                        kernel='epanechnikov')
        m3.run()

        m4 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_exponential',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m4.setVariables(bandwidth='cv_ml',
                        bandwidth1='cv_ml',
                        kernel='exponential')
        m4.run()

        m5 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_linear',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m5.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='linear')
        m5.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_cosine',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='cosine')
        m6.run()

        m1 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_gaussian',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m1.setVariables(bandwidth='cv_ml',
                        bandwidth1='cv_ml',
                        kernel='gaussian')
        m1.run()
Example #5
0
def modelUsers2(users):  # experiment 2
    '''
	Compare different methods with different bandwidths
	'''
    # users=usergeneration.generateSamplesOfActiveUsers(10000)
    for u in users:
        print(u)
        datapreparation.prepareData(u)
        traindata = datapreparation.getTraindata(u)
        testdata = datapreparation.getTestdata(u)

        print('only train data')
        # GMM
        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_2',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=2)
        m1.run()

        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_3',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=3)
        m1.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_Silverman',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth='Silverman')
        m2.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_0.5',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth=0.5)
        m2.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_1',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth=1)
        m2.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_1.5',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth=1.5)
        m2.run()

        neighbors = usergeneration.getSimilarUsers(u)
        featuresListOfNeighbors = []
        for n in neighbors:
            featuresListOfNeighbors += featureextraction.generateFeaturesList(
                n)

        # try another method: find neighbors on each dimension, each idx represents a dim
        featuresListOfNeighborsDims = []
        for i in range(0, len(basicmining.getProfile(u))):
            neighbors = usergeneration.getSimilarUsersDim(u, i)
            listTemp = []
            for n in neighbors:
                listTemp += (featureextraction.generateFeaturesList(n))
            featuresListOfNeighborsDims.append(listTemp)

        print('with others data')
        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_2',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=2)
        m3.run()

        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_3',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=3)
        m3.run()

        # print('fKDE2_Silverman')
        m4 = comparemethods.fKDEModel(u,
                                      modelName='fKDE2_Silverman',
                                      trainData=traindata +
                                      featuresListOfNeighbors,
                                      testData=testdata)
        m4.setVariables(bandwidth='Silverman')
        m4.run()

        # print('mix-fKDE2_bw')
        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_Silverman',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='Silverman', bandwidth1='Silverman')
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_0.5',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth=0.5, bandwidth1=0.5)
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_1',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth=1, bandwidth1=1)
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_1.5',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth=1.5, bandwidth1=1.5)
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml')
        m6.run()
        # test set components according to the dim
        m7 = comparemethods.mixfKDEModelDim(
            u,
            modelName='mix-fKDE2_cv_moreComponents',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighborsDims)
        m7.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml')
        m7.run()