コード例 #1
0
ファイル: experiment.py プロジェクト: zbw0046/CtripMining
 def generateTestUser(self, needNeighbor=False, needNeighborDim=False):
     self.userList = usergeneration.userSet
     nThreads = 4
     basicmining.profileDict = basicmining.generateProfilesMultiThread(
         usergeneration.userSet, nThreads)
     for u in self.userList:
         print(u)
         datapreparation.prepareData(u)
         self.userTrainData[u] = datapreparation.getTraindata(u)
         self.userTestData[u] = datapreparation.getTestdata(u)
         # generate timestamps
         # should only use test set?
         print 'stage 1'
         allRecord = commonoperation.getAllRecordsofAPassengerFromDB(u)
         proportion = 0.7
         splitlingPoint = int(len(allRecord) * proportion)
         self.lastRecordOfTrainingSet[u] = allRecord[splitlingPoint - 1]
         for record in allRecord[splitlingPoint:]:
             if u in self.realRecord.keys():
                 self.realRecord[u].append(record)
                 self.timestamp[u].append(record[11])
             else:
                 self.realRecord[u] = [record]
                 self.timestamp[u] = [record[11]]
         print 'stage 2'
         if needNeighbor:
             featuresListOfNeighbors = []
             neighbors = usergeneration.getSimilarUsers(u)
             for n in neighbors:
                 featuresListOfNeighbors += featureextraction.generateFeaturesList(
                     n)
             self.featureListOfNeighbors[u] = featuresListOfNeighbors
         print 'stage 3'
         if needNeighborDim:
             featuresListOfNeighborsDims = []
             for i in range(0, len(basicmining.getProfile(u))):
                 neighbors = usergeneration.getSimilarUsersDim(u, i)
                 listTemp = []
                 for n in neighbors:
                     listTemp += (featureextraction.generateFeaturesList(n))
                 featuresListOfNeighborsDims.append(listTemp)
             self.featureListOfNeighborsDim[u] = featuresListOfNeighborsDims
         print 'stage 4'
     return 0
コード例 #2
0
from featureextraction import generateFeaturesList

pid='MTEwMTA4MTk3OTEyMTg2MzE2'
results=generateFeaturesList(pid)
print(results[:3])
コード例 #3
0
ファイル: experiment.py プロジェクト: zbw0046/CtripMining
def modelUsers(users):  # experiment 1
    # users=usergeneration.generateSamplesOfActiveUsers(10000)
    for u in users:
        print(u)
        datapreparation.prepareData(u)
        traindata = datapreparation.getTraindata(u)
        testdata = datapreparation.getTestdata(u)

        print('only train data')
        # GMM
        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_2',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=2)
        m1.run()

        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_3',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=3)
        m1.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_Silverman',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth='Silverman')
        m2.run()

        neighbors = usergeneration.getSimilarUsers(u)
        featuresListOfNeighbors = []
        for n in neighbors:
            featuresListOfNeighbors += featureextraction.generateFeaturesList(
                n)

        print('with others data')
        print('GMM')
        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_2',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=2)
        m3.run()

        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_3',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=3)
        m3.run()

        print('fKDE2_Silverman')
        m4 = comparemethods.fKDEModel(u,
                                      modelName='fKDE2_Silverman',
                                      trainData=traindata +
                                      featuresListOfNeighbors,
                                      testData=testdata)
        m4.setVariables(bandwidth='Silverman')
        m4.run()

        print('mix-fKDE1_Silverman')
        m5 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE1_Silverman',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m5.setVariables(bandwidth='Silverman', bandwidth1='Silverman')
        m5.run()

        print('mix-fKDE2_cv')
        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml')
        m6.run()
コード例 #4
0
ファイル: experiment.py プロジェクト: zbw0046/CtripMining
def modelUsers3(users):
    '''
	Compare mix-fKDE2_cv with different kernels
	'''
    for u in users:
        print(u)
        datapreparation.prepareData(u)
        # train,test data from this user
        traindata = datapreparation.getTraindata(u)
        testdata = datapreparation.getTestdata(u)

        neighbors = usergeneration.getSimilarUsers(u)
        # feature from other users, to make mix KDE
        featuresListOfNeighbors = []
        for n in neighbors:
            featuresListOfNeighbors += featureextraction.generateFeaturesList(
                n)

        m2 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_tophat',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m2.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='tophat')
        m2.run()

        m3 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_epanechnikov',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m3.setVariables(bandwidth='cv_ml',
                        bandwidth1='cv_ml',
                        kernel='epanechnikov')
        m3.run()

        m4 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_exponential',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m4.setVariables(bandwidth='cv_ml',
                        bandwidth1='cv_ml',
                        kernel='exponential')
        m4.run()

        m5 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_linear',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m5.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='linear')
        m5.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_cosine',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml', kernel='cosine')
        m6.run()

        m1 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv_gaussian',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m1.setVariables(bandwidth='cv_ml',
                        bandwidth1='cv_ml',
                        kernel='gaussian')
        m1.run()
コード例 #5
0
ファイル: experiment.py プロジェクト: zbw0046/CtripMining
def modelUsers2(users):  # experiment 2
    '''
	Compare different methods with different bandwidths
	'''
    # users=usergeneration.generateSamplesOfActiveUsers(10000)
    for u in users:
        print(u)
        datapreparation.prepareData(u)
        traindata = datapreparation.getTraindata(u)
        testdata = datapreparation.getTestdata(u)

        print('only train data')
        # GMM
        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_2',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=2)
        m1.run()

        m1 = comparemethods.GMMModel(u,
                                     modelName='GMM1_3',
                                     trainData=traindata,
                                     testData=testdata)
        m1.setVariables(nComponents=3)
        m1.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_Silverman',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth='Silverman')
        m2.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_0.5',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth=0.5)
        m2.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_1',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth=1)
        m2.run()

        m2 = comparemethods.fKDEModel(u,
                                      modelName='fKDE1_1.5',
                                      trainData=traindata,
                                      testData=testdata)
        m2.setVariables(bandwidth=1.5)
        m2.run()

        neighbors = usergeneration.getSimilarUsers(u)
        featuresListOfNeighbors = []
        for n in neighbors:
            featuresListOfNeighbors += featureextraction.generateFeaturesList(
                n)

        # try another method: find neighbors on each dimension, each idx represents a dim
        featuresListOfNeighborsDims = []
        for i in range(0, len(basicmining.getProfile(u))):
            neighbors = usergeneration.getSimilarUsersDim(u, i)
            listTemp = []
            for n in neighbors:
                listTemp += (featureextraction.generateFeaturesList(n))
            featuresListOfNeighborsDims.append(listTemp)

        print('with others data')
        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_2',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=2)
        m3.run()

        m3 = comparemethods.GMMModel(u,
                                     modelName='GMM2_3',
                                     trainData=traindata +
                                     featuresListOfNeighbors,
                                     testData=testdata)
        m3.setVariables(nComponents=3)
        m3.run()

        # print('fKDE2_Silverman')
        m4 = comparemethods.fKDEModel(u,
                                      modelName='fKDE2_Silverman',
                                      trainData=traindata +
                                      featuresListOfNeighbors,
                                      testData=testdata)
        m4.setVariables(bandwidth='Silverman')
        m4.run()

        # print('mix-fKDE2_bw')
        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_Silverman',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='Silverman', bandwidth1='Silverman')
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_0.5',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth=0.5, bandwidth1=0.5)
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_1',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth=1, bandwidth1=1)
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_1.5',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth=1.5, bandwidth1=1.5)
        m6.run()

        m6 = comparemethods.mixfKDEModel(
            u,
            modelName='mix-fKDE2_cv',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighbors)
        m6.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml')
        m6.run()
        # test set components according to the dim
        m7 = comparemethods.mixfKDEModelDim(
            u,
            modelName='mix-fKDE2_cv_moreComponents',
            trainData=traindata,
            testData=testdata,
            trainDataOfNeighbors=featuresListOfNeighborsDims)
        m7.setVariables(bandwidth='cv_ml', bandwidth1='cv_ml')
        m7.run()
コード例 #6
0
def prepareData(uId):
    if uId not in traindataDict or uId not in testdataDict:
        featuresList = featureextraction.generateFeaturesList(uId)
        splitData(uId, featuresList)
    pass