def calculate_bluetooth_sim(user1, user2):
    sum_sim = 0.0
    UserFile1 = []
    UserFile2 = []
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    user1Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep +
                                    user1))
    user2Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep +
                                    user2))
    user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep
    user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep
    try:
        for i in range(len(user1Floder)):
            u1file = user1filepath + user1Floder[
                i] + os.sep + 'Processed_bluetooth.txt'
            if os.path.exists(u1file):
                UserFile1.append(u1file)

        for j in range(len(user1Floder)):
            u2file = user2filepath + user2Floder[
                j] + os.sep + 'Processed_bluetooth.txt'
            if os.path.exists(u2file):
                UserFile2.append(u2file)
    except Exception, e:
        print e
        traceback.print_exc()
def drawfenkai(filename):

    from getDir import GetDirName
    import os
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' +
                                                        'starlog')

    for path_file in other:
        TimePic = []
        for i in range(len(path_file)):
            temp = []
            path_file_name = parent_path + path_file[i] + os.sep + filename
            #print(path_file_name)
            data = np.loadtxt(path_file_name,
                              dtype=str,
                              delimiter=',',
                              usecols=(0, 1, 2),
                              unpack=False)
            start = data[1][1].split(' ')[0]
            for i in range(len(data)):
                tmp = []
                tmp.append(canculateDate(data[i][1], start))
                tmp.append(canculateDate(data[i][2], start))
                tmp.append(data[i][0])
                temp.append(tmp)
            TimePic.append(temp)
        print(path_file_name)
        drewPic(TimePic, path_file_name)
def drawfenkai(filename):

    from getDir import GetDirName
    import os

    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + "\\" + "starlog")

    for path_file in other:
        TimePic = []
        for i in range(len(path_file)):
            temp = []
            path_file_name = parent_path + path_file[i] + os.sep + filename
            # print(path_file_name)
            data = np.loadtxt(path_file_name, dtype=str, delimiter=",", usecols=(0, 1, 2), unpack=False)
            start = data[1][1].split(" ")[0]
            for i in range(len(data)):
                tmp = []
                tmp.append(canculateDate(data[i][1], start))
                tmp.append(canculateDate(data[i][2], start))
                tmp.append(data[i][0])
                temp.append(tmp)
            TimePic.append(temp)
        print(path_file_name)
        drewPic(TimePic, path_file_name)
def create_model(labelFileName='RClabelTime.txt'):
    FileName=[]
    from getDir import GetDirName
    import os
    getdir=GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+'\\'+'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name=parent_path+path_file[i]+os.sep+labelFileName
            FileName.append(path_file_name)

    train_set=[]
    for file in FileName:  #一个file 就是一天的数据路径,也就是一天的数据,外层一个for循环就是 写找完一天的记录
        ondaydata=np.loadtxt(file,dtype=str,delimiter=',',usecols=(0,1,3)) #label,starttime,continuetime
        tempsentence=[]
        for i in range(len(ondaydata)):
            datasstrip=ondaydata[i][1].split(' ')
            word=ondaydata[i][0]+'_'+datasstrip[0]+'_'+time2hour(datasstrip[1])+'_'+str(int(ondaydata[i][2])//(10*60))
            tempsentence.append(word)
        train_set.append(tempsentence)

    dic = corpora.Dictionary(train_set)
    corpus = [dic.doc2bow(text) for text in train_set]
    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]
    lda = models.LdaModel(corpus_tfidf, id2word = dic, num_topics = 24)
    corpus_lda = lda[corpus_tfidf]
    lda.save(".\\LDA_all_27\\SemanticLda"+str(24)+".txt")
    dic.save(".\\LDA_all_27\\SemanticDic"+str(24)+".txt")
    tfidf.save(".\\LDA_all_27\\SemanticTFIDF"+str(24)+".txt")

    return  lda,dic,tfidf,train_set,AllUserFiles
def getAprioriItem(minSupport=0.5,minConfidence=0.8):  #minSupport=0.5,minConfidence=0.8
    getdir=GetDirName()
    Fulldirlist=[]
    parent_path = os.path.dirname(os.getcwd())
    #print(parent_path)
    dirlist=getdir.printPath(parent_path+os.sep+"starlog")
    #print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'appriori_sequence.csv')
    #print(Fulldirlist)

    for infile in Fulldirlist: #infile is the path of appriori_sequence.csv
        #print(infile)
        sequence_Frequence=[]
        inFile = dataFromFile(infile)
        items, rules = runApriori(inFile, minSupport, minConfidence)
        #printResults(items, rules)
        savefile=open(infile.replace('appriori_sequence.csv','sequencefrequence.txt'),'w')
        for item, support in sorted(items, key=lambda (item, support): support):
            #print "item: %s , %.3f" % (str(item), support)
            seitem=list(item)
            if len(seitem)>1:
                sequence_Frequence.append(seitem)
                for index in range(len(seitem)-1):
                    savefile.write(seitem[index])
                    savefile.write(',')
                savefile.write(seitem[len(seitem)-1])
                savefile.write('\n')
def getAll_label2dic():
    getdir=GetDirName()
    Fulldirlist=[]
    parent_path = os.path.dirname(os.getcwd())
    print(parent_path)
    dirlist=getdir.printPath(parent_path+os.sep+"starlog")
    print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'RCed_stoppoint.txt')  #Fulldirlist 是的地方的的的飞艾丝凡安吉拉广发分撒娇课历史的高考了收到就会公司加快的更好
            #  #print(Fulldirlist)  Fulldirlist is all of the user's label tag path


    #############this work want to canculate the dictionary of full label tags,which is like (pdl, bedroom ....) ---> ('1','2'......)
    ############# thus the list could be used to find the frequency item(trajectory model) from user, then translate the item ser to label set
    # order by dictionary ('1','2'......)--->(pdl, bedroom ....)
    allLabelSet=[]
    allLabelDic={}
    for filePath in Fulldirlist:
        labelTag=np.loadtxt(filePath,dtype=str,delimiter=',',usecols=(4,))
        taglist=list(set(labelTag))
        for item in taglist:
            if item not in allLabelSet:
                allLabelSet.append(item)

    allLabelSet=list(set(allLabelSet))
    for index in range(len(allLabelSet)):
        allLabelDic[allLabelSet[index]]=str(index)
    output=open('alllabelDic.pkl','wb')
    pickle.dump(allLabelDic,output)
    output.close()
    return allLabelSet,allLabelDic
def calculate_user_sim_onBluetooth():
    userlist = []
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + os.sep +
                                                        'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name = parent_path + path_file[
                i] + os.sep + 'bluetooth.txt'
            if os.path.exists(path_file_name):
                userlist.append(path_file_name.split(os.sep)[-3])

    userlist = list(set(userlist))
    print userlist
    for i in range(len(userlist) - 1):
        for j in range(i, len(userlist)):
            print userlist[i], userlist[j]
            result = calculate_bluetooth_sim(userlist[i], userlist[j])
            ans.write(userlist[i])
            ans.write(',')
            ans.write(userlist[j])
            ans.write(',')
            ans.write(str(result))
            ans.write('\n')
def GetLabelFile():
    getdir=GetDirName()
    Fulldirlist=[]
    parent_path = os.path.dirname(os.getcwd())
    dirlist=getdir.printPath(parent_path+"\\GPS_Get_PreProcesser")
    for dir in dirlist:
        Fulldirlist.append(parent_path+"\\GPS_Get_PreProcesser"+"\\"+dir+"\\"+'semanticGPS_stoppoint.txt')
    return Fulldirlist
Beispiel #9
0
def GetLabelFile():
    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    dirlist = getdir.printPath(parent_path + "\\GPS_Get_PreProcesser")
    for dir in dirlist:
        Fulldirlist.append(parent_path + "\\GPS_Get_PreProcesser" + "\\" +
                           dir + "\\" + 'semanticGPS_stoppoint.txt')
    return Fulldirlist
def getfullfilepath():
    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    dirlist = getdir.printPath(parent_path + "\\GPS_Get_PreProcesser")
    for dir in dirlist:
        Fulldirlist.append(parent_path + "\\GPS_Get_PreProcesser" + "\\" + dir + "\\" + "locationGPS.txt")
    # print(Fulldirlist)
    return Fulldirlist
def get_fenlei_user():
    wifi_path=[]
    getdir=GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+os.sep+'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name=parent_path+path_file[i]+os.sep+'wifi.txt'
            wifi_path.append(path_file_name)
    return wifi_path
Beispiel #12
0
def getfullfilepath():
    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    dirlist = getdir.printPath(parent_path + "\\GPS_Get_PreProcesser")
    for dir in dirlist:
        Fulldirlist.append(parent_path + "\\GPS_Get_PreProcesser" + "\\" +
                           dir + "\\" + 'locationGPS.txt')
    #print(Fulldirlist)
    return Fulldirlist
Beispiel #13
0
def GetSemanticGPSpath():
    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    dirlist = getdir.printPath(parent_path + os.sep + "GPS_Get_PreProcesser")
    for dir in dirlist:
        Fulldirlist.append(
            parent_path + os.sep + "GPS_Get_PreProcesser" + os.sep + dir +
            os.sep +
            'semanticGPS.txt')  #semanticGPS.txt是处理后的gps 加时间, 加label的文件信息
    return Fulldirlist
def get_fenlei_user():
    wifi_path = []
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + os.sep + "starlog")
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name = parent_path + path_file[i] + os.sep + "bluetooth.txt"
            if os.path.exists(path_file_name):
                wifi_path.append(path_file_name)
                #####Trans_btoothjson_txt(path_file_name)  first time to excue this func will create blueeth csv file with add model to do
    return wifi_path
Beispiel #15
0
def getUserFloderList(user0,user1):

    getdir=GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    user1Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user0))
    user2Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user1))
    user1filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user0+os.sep
    user2filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user1+os.sep
    num_floder=len(user1Floder) if len(user1Floder)<=len(user2Floder) else len(user2Floder)
    user1Floder.sort()
    user2Floder.sort()
    return simHashLabel(user1filepath,user2filepath,user1Floder,user2Floder,num_floder)
def getUserFloderpath():
    getdir = GetDirName()
    Usernamepath = []
    parent_path = os.path.dirname(os.getcwd())
    # print(parent_path)
    dirlist = getdir.printPath(parent_path + os.sep + "starlog")
    usernameGroup = dirlist
    # print (dirlist)

    for dir in dirlist:
        # seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir))
        # for secdir in seconddir:
        Usernamepath.append(parent_path + os.sep + "starlog" + os.sep + dir)
    return usernameGroup
def getUserFloderpath():
    getdir = GetDirName()
    Usernamepath = []
    parent_path = os.path.dirname(os.getcwd())
    #print(parent_path)
    dirlist = getdir.printPath(parent_path + os.sep + "starlog")
    usernameGroup = dirlist
    #print (dirlist)

    for dir in dirlist:
        # seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir))
        #for secdir in seconddir:
        Usernamepath.append(parent_path + os.sep + "starlog" + os.sep + dir)
    return (usernameGroup)
def canclulate_wifi(user1, user2):
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    user1Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user1)
    user2Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user2)
    user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep
    user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep
    num_floder = len(user1Floder) if len(user1Floder) <= len(user2Floder) else len(user2Floder)
    sum_sim = 0.0
    for i in range(num_floder):
        u1file = user1filepath + user1Floder[i] + os.sep + "Downwifi.txt"
        u2file = user2filepath + user2Floder[i] + os.sep + "Downwifi.txt"

        sum_sim += daily_wifi_sim(u1file, u2file)
    return sum_sim
Beispiel #19
0
def canclulate_wifi(user1,user2):
    getdir=GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    user1Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user1))
    user2Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user2))
    user1filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user1+os.sep
    user2filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user2+os.sep
    num_floder=len(user1Floder) if len(user1Floder)<=len(user2Floder) else len(user2Floder)
    sum_sim=0.0
    for i in range(num_floder):
        u1file=user1filepath+user1Floder[i]+os.sep+'Downwifi.txt'
        u2file=user2filepath+user2Floder[i]+os.sep+'Downwifi.txt'

        sum_sim+= daily_wifi_sim(u1file,u2file)
    return sum_sim
def mian():
    from Semantics_of_Trajectories import Calculate_semantic_of_point
    from stop_points import getfullfilepath

    full = getfullfilepath()
    for n in range(len(full)):
        path_file = full[n].replace("locationGPS.txt", "location.txt")
        gps_data, timestamp, accur = get_data(path_file)
        # print len(accur)
        # print len(timestamp)
        labels, SP = getStayPoint(gps_data, timestamp, disthreshold=90, timethreshold=180)
        # print len(labels)
        # print(len(labels)-1)
        print (full[n])
        labels.append(labels[len(labels) - 1])  #!!!!!!!!注意,labels的长度可能为0 好奇怪
        # print len(labels)
        # print len(SP)
        # print SP
        stoppointlabel = []
        labDIC = {}
        for i in range(len(SP)):
            value = Calculate_semantic_of_point.Match_semantics(SP[i], 90)
            stoppointlabel.append(value)
            labDIC[i] = value
        init_rs_staypoint_time(labels, gps_data, timestamp, accur, SP, labDIC, path_file)

    # 分类的用户的文件夹
    from getDir import GetDirName
    import os

    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())

    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + "\\" + "starlog")
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name = parent_path + path_file[i] + os.sep + "location.txt"
            gps_data, timestamp, accur = get_data(path_file_name)
            labels, SP = getStayPoint(gps_data, timestamp, disthreshold=90, timethreshold=180)
            print (path_file[i])
            labels.append(labels[len(labels) - 1])  #!!!!!!!!注意,labels的长度可能为0 好奇怪
            stoppointlabel = []
            labDIC = {}
            for i in range(len(SP)):
                value = Calculate_semantic_of_point.Match_semantics(SP[i], 90)
                stoppointlabel.append(value)
                labDIC[i] = value
            init_rs_staypoint_time(labels, gps_data, timestamp, accur, SP, labDIC, path_file_name)
Beispiel #21
0
def mian():
    from Semantics_of_Trajectories import Calculate_semantic_of_point
    from stop_points import getfullfilepath
    full=getfullfilepath()
    for n in range(len(full)):
        path_file=full[n].replace('locationGPS.txt','location.txt')
        gps_data,timestamp,accur = get_data(path_file)
        # print len(accur)
        # print len(timestamp)
        labels,SP = getStayPoint(gps_data,timestamp,disthreshold=90,timethreshold=180)
        #print len(labels)
        #print(len(labels)-1)
        print(full[n])
        labels.append(labels[len(labels)-1]) #!!!!!!!!注意,labels的长度可能为0 好奇怪
        # print len(labels)
        # print len(SP)
        # print SP
        stoppointlabel=[]
        labDIC={}
        for  i  in range(len(SP)):
            value=Calculate_semantic_of_point.Match_semantics(SP[i],90)
            stoppointlabel.append(value)
            labDIC[i]=value
        init_rs_staypoint_time(labels,gps_data,timestamp,accur,SP,labDIC,path_file)

    #分类的用户的文件夹
    from getDir import GetDirName
    import os
    getdir=GetDirName()
    parent_path = os.path.dirname(os.getcwd())

    AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+'\\'+'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name=parent_path+path_file[i]+os.sep+'location.txt'
            gps_data,timestamp,accur = get_data(path_file_name)
            labels,SP = getStayPoint(gps_data,timestamp,disthreshold=90,timethreshold=180)
            print(path_file[i])
            labels.append(labels[len(labels)-1]) #!!!!!!!!注意,labels的长度可能为0 好奇怪
            stoppointlabel=[]
            labDIC={}
            for  i  in range(len(SP)):
                value=Calculate_semantic_of_point.Match_semantics(SP[i],90)
                stoppointlabel.append(value)
                labDIC[i]=value
            init_rs_staypoint_time(labels,gps_data,timestamp,accur,SP,labDIC,path_file_name)
def main():
    from label_add_time import GetSemanticGPSpath
    full=GetSemanticGPSpath()
    for n in range(len(full)):
        path_file=full[n].replace('semanticGPS.txt','RC_stoppoint.txt')
        print(path_file)
        Read_RC_stoppoint(path_file)
    print 'ok.....have process over'
      #分类的用户的文件夹
    from getDir import GetDirName
    import os
    getdir=GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles,AllFiles,other=getdir.getUserFiles(parent_path+'\\'+'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name=parent_path+path_file[i]+os.sep+'RC_stoppoint.txt'
            Read_RC_stoppoint(path_file_name)
    print 'ok.....have process over'
Beispiel #23
0
def initAprioriitem():
    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    #print(parent_path)
    dirlist = getdir.printPath(parent_path + os.sep + "starlog")
    #print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir = (getdir.printPath(parent_path + os.sep + "starlog" +
                                      os.sep + dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path + os.sep + "starlog" + os.sep +
                               dir + os.sep + secdir + os.sep +
                               'RCed_stoppoint.txt')

    for path in Fulldirlist:
        data = np.loadtxt(path, dtype=str, delimiter=',', usecols=(3, 4))
        data_moring = []
        data_noon = []
        data_night = []
        i = 1
        for item in data:

            labtltime = str2timeNum(
                item[0]
            )  # here labeltime is a number as we cut one hour into two pices of time(30 min)
            if labtltime >= 0 and labtltime <= 8 * 60:  #item is ['2015-07-07 00:00:00', '5_bedroom']
                data_moring.append(item[1])
            elif labtltime > 8 * 60 and labtltime <= 16 * 60:
                data_noon.append(item[1])
            elif labtltime > 16 * 60 and labtltime < 24 * 60:
                data_night.append(item[1])
        # print(data_night)
        # print(data_moring)
        savefile = open(
            path.replace('RCed_stoppoint.txt', 'appriori_sequence.csv'), 'wb')
        SaveFile = csv.writer(savefile)
        SaveFile.writerow(data_moring)
        SaveFile.writerow(data_noon)
        SaveFile.writerow(data_night)
        savefile.close()
Beispiel #24
0
def RC_Label_Time_process():
    filelist = GetSemanticGPSpath()
    lda = ldaHelper()
    for file in filelist:
        f = file.replace('semanticGPS.txt', 'RC_stoppoint.txt')
        write_labelTime2file(lda.Add_RCtimestamp(f), f)
        print('have done %s' % f)
        #分类的用户的文件夹
    from getDir import GetDirName
    import os
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' +
                                                        'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name = parent_path + path_file[
                i] + os.sep + 'RC_stoppoint.txt'
            write_labelTime2file(lda.Add_RCtimestamp(path_file_name),
                                 path_file_name)
Beispiel #25
0
def main():
    from label_add_time import GetSemanticGPSpath
    full = GetSemanticGPSpath()
    for n in range(len(full)):
        path_file = full[n].replace('semanticGPS.txt', 'RC_stoppoint.txt')
        print(path_file)
        Read_RC_stoppoint(path_file)
    print 'ok.....have process over'
    #分类的用户的文件夹
    from getDir import GetDirName
    import os
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' +
                                                        'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name = parent_path + path_file[
                i] + os.sep + 'RC_stoppoint.txt'
            Read_RC_stoppoint(path_file_name)
    print 'ok.....have process over'
Beispiel #26
0
def getAll_label2dic():
    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    print(parent_path)
    dirlist = getdir.printPath(parent_path + os.sep + "starlog")
    print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir = (getdir.printPath(parent_path + os.sep + "starlog" +
                                      os.sep + dir))
        for secdir in seconddir:
            Fulldirlist.append(
                parent_path + os.sep + "starlog" + os.sep + dir + os.sep +
                secdir + os.sep + 'RCed_stoppoint.txt'
            )  #Fulldirlist 是的地方的的的飞艾丝凡安吉拉广发分撒娇课历史的高考了收到就会公司加快的更好
            #  #print(Fulldirlist)  Fulldirlist is all of the user's label tag path

    #############this work want to canculate the dictionary of full label tags,which is like (pdl, bedroom ....) ---> ('1','2'......)
    ############# thus the list could be used to find the frequency item(trajectory model) from user, then translate the item ser to label set
    # order by dictionary ('1','2'......)--->(pdl, bedroom ....)
    allLabelSet = []
    allLabelDic = {}
    for filePath in Fulldirlist:
        labelTag = np.loadtxt(filePath,
                              dtype=str,
                              delimiter=',',
                              usecols=(4, ))
        taglist = list(set(labelTag))
        for item in taglist:
            if item not in allLabelSet:
                allLabelSet.append(item)

    allLabelSet = list(set(allLabelSet))
    for index in range(len(allLabelSet)):
        allLabelDic[allLabelSet[index]] = str(index)
    output = open('alllabelDic.pkl', 'wb')
    pickle.dump(allLabelDic, output)
    output.close()
    return allLabelSet, allLabelDic
def frequence_mod_sim(user1, user2):
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    user1Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep +
                                    user1))
    user2Floder = (getdir.printPath(parent_path + os.sep + "starlog" + os.sep +
                                    user2))
    user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep
    user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep
    num_floder = len(user1Floder) if len(user1Floder) <= len(
        user2Floder) else len(user2Floder)

    similary_dgree = 0.0
    for Floderindex in range(num_floder):
        t_u1_file = user1filepath + user1Floder[
            Floderindex] + os.sep + 'itemfrequence.txt'  #get use's itemfrequence data to caclulate simliarty
        t_u2_file = user2filepath + user2Floder[
            Floderindex] + os.sep + 'itemfrequence.txt'

        # if len(user1Floder)<=len(user2Floder):
        #     t_u1_file=user1filepath+user1Floder[Floderindex]+os.sep+'itemfrequence.txt'   #get use's itemfrequence data to caclulate simliarty
        #     t_u2_file=user2filepath+user2Floder[Floderindex+len(user2Floder)-len(user1Floder)]+os.sep+'itemfrequence.txt'
        # elif len(user1Floder)>len(user2Floder):
        #     t_u1_file=user1filepath+user1Floder[Floderindex+len(user1Floder)-len(user2Floder)]+os.sep+'itemfrequence.txt'   #get use's itemfrequence data to caclulate simliarty
        #     t_u2_file=user2filepath+user2Floder[Floderindex]+os.sep+'itemfrequence.txt'

        listuser1 = []
        listuser2 = []
        listinlisetuser1 = []
        listinlisetuser2 = []
        for line in open(t_u1_file):
            listuser1.extend(line.replace("\n", "").split(','))
            listinlisetuser1.append(line.replace("\n", "").split(','))
        for line in open(t_u2_file):
            listuser2.extend(line.replace("\n", "").split(','))
            listinlisetuser2.append(line.replace("\n", "").split(','))
        similary_dgree += currentFileSim(listuser1, listuser2,
                                         listinlisetuser1, listinlisetuser2)

    return similary_dgree
Beispiel #28
0
def create_model(labelFileName='RClabelTime.txt'):
    FileName = []
    from getDir import GetDirName
    import os
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles, AllFiles, other = getdir.getUserFiles(parent_path + '\\' +
                                                        'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name = parent_path + path_file[i] + os.sep + labelFileName
            FileName.append(path_file_name)

    train_set = []
    for file in FileName:  #一个file 就是一天的数据路径,也就是一天的数据,外层一个for循环就是 写找完一天的记录
        ondaydata = np.loadtxt(file,
                               dtype=str,
                               delimiter=',',
                               usecols=(0, 1,
                                        3))  #label,starttime,continuetime
        tempsentence = []
        for i in range(len(ondaydata)):
            datasstrip = ondaydata[i][1].split(' ')
            word = ondaydata[i][0] + '_' + datasstrip[0] + '_' + time2hour(
                datasstrip[1]) + '_' + str(int(ondaydata[i][2]) // (10 * 60))
            tempsentence.append(word)
        train_set.append(tempsentence)

    dic = corpora.Dictionary(train_set)
    corpus = [dic.doc2bow(text) for text in train_set]
    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]
    lda = models.LdaModel(corpus_tfidf, id2word=dic, num_topics=24)
    corpus_lda = lda[corpus_tfidf]
    lda.save(".\\LDA_all_27\\SemanticLda" + str(24) + ".txt")
    dic.save(".\\LDA_all_27\\SemanticDic" + str(24) + ".txt")
    tfidf.save(".\\LDA_all_27\\SemanticTFIDF" + str(24) + ".txt")

    return lda, dic, tfidf, train_set, AllUserFiles
def collect_all2one():

    ###################here we collect all of day's data and shows into one file#############
    getdir=GetDirName()
    Fulldirlist=[]
    parent_path = os.path.dirname(os.getcwd())
    dirlist=getdir.printPath(parent_path+os.sep+"starlog")
    for dir in dirlist:
        seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'sequence_pattern.txt')
    resultPath=[]
    for s in Fulldirlist:

        temppath=s[0:s[0:s.rfind(os.sep)].rfind(os.sep)]+os.sep+'allsequencepattern.txt'
        resultPath.append(temppath)
        output=open(temppath,'a')
        for line in open(s,'r'):
            output.writelines(line)
        output.close()
    resultFloder=list(set(resultPath))
    detect_Sequencepattern(resultFloder,0.01)
Beispiel #30
0
def getAprioriItem(minSupport=0.5,
                   minConfidence=0.8):  #minSupport=0.5,minConfidence=0.8
    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    #print(parent_path)
    dirlist = getdir.printPath(parent_path + os.sep + "starlog")
    #print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir = (getdir.printPath(parent_path + os.sep + "starlog" +
                                      os.sep + dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path + os.sep + "starlog" + os.sep +
                               dir + os.sep + secdir + os.sep +
                               'appriori_sequence.csv')
    #print(Fulldirlist)

    for infile in Fulldirlist:  #infile is the path of appriori_sequence.csv
        #print(infile)
        sequence_Frequence = []
        inFile = dataFromFile(infile)
        items, rules = runApriori(inFile, minSupport, minConfidence)
        #printResults(items, rules)
        savefile = open(
            infile.replace('appriori_sequence.csv', 'sequencefrequence.txt'),
            'w')
        for item, support in sorted(items, key=lambda
                                    (item, support): support):
            #print "item: %s , %.3f" % (str(item), support)
            seitem = list(item)
            if len(seitem) > 1:
                sequence_Frequence.append(seitem)
                for index in range(len(seitem) - 1):
                    savefile.write(seitem[index])
                    savefile.write(',')
                savefile.write(seitem[len(seitem) - 1])
                savefile.write('\n')
def calculate_bluetooth_sim(user1,user2):
    sum_sim = 0.0
    UserFile1=[]
    UserFile2=[]
    getdir=GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    user1Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user1))
    user2Floder=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+user2))
    user1filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user1+os.sep
    user2filepath=parent_path+os.sep+"starlog"+os.sep+os.sep+user2+os.sep
    try:
        for i in range(len(user1Floder)):
            u1file=user1filepath+user1Floder[i]+os.sep+'Processed_bluetooth.txt'
            if os.path.exists(u1file):
                UserFile1.append(u1file)

        for j in range(len(user1Floder)):
            u2file=user2filepath+user2Floder[j]+os.sep+'Processed_bluetooth.txt'
            if os.path.exists(u2file):
                UserFile2.append(u2file)
    except Exception ,e:
        print e
        traceback.print_exc()
def calculate_user_sim_onBluetooth():
    userlist = []
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    AllUserFiles,AllFiles,other = getdir.getUserFiles(parent_path+os.sep+'starlog')
    for path_file in other:
        for i in range(len(path_file)):
            path_file_name = parent_path+path_file[i]+os.sep+'bluetooth.txt'
            if os.path.exists(path_file_name):
                userlist.append(path_file_name.split(os.sep)[-3])

    userlist = list(set(userlist))
    print userlist
    for i in range(len(userlist)-1):
        for j in range(i,len(userlist)):
            print userlist[i],userlist[j]
            result= calculate_bluetooth_sim(userlist[i],userlist[j])
            ans.write(userlist[i])
            ans.write(',')
            ans.write(userlist[j])
            ans.write(',')
            ans.write(str(result))
            ans.write('\n')
def initAprioriitem():
    getdir=GetDirName()
    Fulldirlist=[]
    parent_path = os.path.dirname(os.getcwd())
    #print(parent_path)
    dirlist=getdir.printPath(parent_path+os.sep+"starlog")
    #print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'RCed_stoppoint.txt')


    for path in Fulldirlist:
        data=np.loadtxt(path,dtype=str,delimiter=',',usecols=(3,4))
        data_moring=[]
        data_noon=[]
        data_night=[]
        i=1
        for item in data:

            labtltime=str2timeNum(item[0]) # here labeltime is a number as we cut one hour into two pices of time(30 min)
            if labtltime>=0 and labtltime <=8*60:   #item is ['2015-07-07 00:00:00', '5_bedroom']
                data_moring.append(item[1])
            elif labtltime >8*60 and labtltime<= 16*60:
                data_noon.append(item[1])
            elif labtltime >16*60 and labtltime< 24*60:
                data_night.append(item[1])
        # print(data_night)
        # print(data_moring)
        savefile=open(path.replace('RCed_stoppoint.txt','appriori_sequence.csv'),'wb')
        SaveFile=csv.writer(savefile)
        SaveFile.writerow(data_moring)
        SaveFile.writerow(data_noon)
        SaveFile.writerow(data_night)
        savefile.close()
def frequence_mod_sim(user1, user2):
    getdir = GetDirName()
    parent_path = os.path.dirname(os.getcwd())
    user1Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user1)
    user2Floder = getdir.printPath(parent_path + os.sep + "starlog" + os.sep + user2)
    user1filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user1 + os.sep
    user2filepath = parent_path + os.sep + "starlog" + os.sep + os.sep + user2 + os.sep
    num_floder = len(user1Floder) if len(user1Floder) <= len(user2Floder) else len(user2Floder)

    similary_dgree = 0.0
    for Floderindex in range(num_floder):
        t_u1_file = (
            user1filepath + user1Floder[Floderindex] + os.sep + "itemfrequence.txt"
        )  # get use's itemfrequence data to caclulate simliarty
        t_u2_file = user2filepath + user2Floder[Floderindex] + os.sep + "itemfrequence.txt"

        # if len(user1Floder)<=len(user2Floder):
        #     t_u1_file=user1filepath+user1Floder[Floderindex]+os.sep+'itemfrequence.txt'   #get use's itemfrequence data to caclulate simliarty
        #     t_u2_file=user2filepath+user2Floder[Floderindex+len(user2Floder)-len(user1Floder)]+os.sep+'itemfrequence.txt'
        # elif len(user1Floder)>len(user2Floder):
        #     t_u1_file=user1filepath+user1Floder[Floderindex+len(user1Floder)-len(user2Floder)]+os.sep+'itemfrequence.txt'   #get use's itemfrequence data to caclulate simliarty
        #     t_u2_file=user2filepath+user2Floder[Floderindex]+os.sep+'itemfrequence.txt'

        listuser1 = []
        listuser2 = []
        listinlisetuser1 = []
        listinlisetuser2 = []
        for line in open(t_u1_file):
            listuser1.extend(line.replace("\n", "").split(","))
            listinlisetuser1.append(line.replace("\n", "").split(","))
        for line in open(t_u2_file):
            listuser2.extend(line.replace("\n", "").split(","))
            listinlisetuser2.append(line.replace("\n", "").split(","))
        similary_dgree += currentFileSim(listuser1, listuser2, listinlisetuser1, listinlisetuser2)

    return similary_dgree
        file_processGPS.writelines(',')
        file_processGPS.writelines('0.0')
        file_processGPS.write('\n')
    file_processGPS.close()


def drewgps(weidu, jindu, date):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(jindu, weidu, c='r', marker='.')
    plt.title('location of : %s' % date)
    plt.savefig('.\\GPS_pic\\' + date + '.png', dpi=800)
    plt.close()
    #plt.show()


if __name__ == '__main__':
    getdir = GetDirName()
    dirlist = []
    dirlist = getdir.printPath(".\\GPS_Get_PreProcesser")
    print(dirlist)
    for wenjianjia in dirlist:
        print ".\\GPS_Get_PreProcesser" + '\\' + wenjianjia + '\\' + 'location.txt', wenjianjia
        readfile(
            ".\\GPS_Get_PreProcesser" + '\\' + wenjianjia + '\\' +
            'location.txt', wenjianjia)
    #下面是写分开每个用户的结果哦
    AllUserFiles, AllFiles, other = getdir.getUserFiles()
    for wenjianjia in AllFiles:
        readfile(wenjianjia + os.sep + 'location.txt', wenjianjia)
def getFolderNum(username):
    getdir = GetDirName()
    dirlist = getdir.printPath(username)
    print(dirlist)
        file_processGPS.writelines(',')
        file_processGPS.writelines(i[1])
        file_processGPS.writelines(',')
        file_processGPS.writelines(i[2])
        file_processGPS.writelines(',')
        file_processGPS.writelines(i[3])
        file_processGPS.writelines(',')
        file_processGPS.writelines('0.0')
        file_processGPS.write('\n')
    file_processGPS.close()
def drewgps(weidu,jindu,date):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(jindu,weidu,c='r',marker='.')
    plt.title('location of : %s' % date)
    plt.savefig('.\\GPS_pic\\'+date+'.png',dpi=800)
    plt.close()
    #plt.show()
if __name__=='__main__':
    getdir=GetDirName()
    dirlist=[]
    dirlist=getdir.printPath(".\\GPS_Get_PreProcesser")
    print(dirlist)
    for wenjianjia in dirlist:
        print ".\\GPS_Get_PreProcesser"+  '\\'  +  wenjianjia  +  '\\'  +   'location.txt',wenjianjia
        readfile(".\\GPS_Get_PreProcesser"+  '\\'  +  wenjianjia  +  '\\'  +   'location.txt',wenjianjia)
    #下面是写分开每个用户的结果哦
    AllUserFiles,AllFiles,other=getdir.getUserFiles()
    for wenjianjia in AllFiles:
        readfile(wenjianjia  + os.sep+ 'location.txt',wenjianjia)
Beispiel #38
0
            writeans([temppath_index,tt,re])
            print('-----------------------我是分割线-----------------------------')

def writeans(out):
    output=open('network.txt','a+')
    output.write(str(out[0]))
    output.write(',')
    output.write(str(out[1]))
    output.write(',')
    output.write(str(out[2]))
    output.write('\n')
    output.close()

if __name__=='__main__':
    Fulldirlist=[]
    getdir=GetDirName()
    dirlist=getdir.printPath(".\\GPS_Get_PreProcesser")
    for dir in dirlist:
        Fulldirlist.append(".\\GPS_Get_PreProcesser"+"\\"+dir+"\\"+'RCed_stoppoint.txt')
    print(Fulldirlist)

    '''
    fullpath=stop_points.getfullfilepath()

    tra1,tra2=Get_Prime_GpsData(".\\GPS_Get_PreProcesser\\10-21-2015\\locationGPS.txt",".\\GPS_Get_PreProcesser\\10-20-2015\\locationGPS.txt")
    testdtw(tra1,tra2)

    tra3,tra4=Get_Prime_GpsData(".\\GPS_Get_PreProcesser\\7-1-2015\\locationGPS.txt",".\\GPS_Get_PreProcesser\\10-20-2015\\locationGPS.txt")
    testdtw(tra3,tra4)
    '''
    #tra1,tra2=Get_Prime_GpsData(".\\GPS_Get_PreProcesser\\10-01-2015\\locationGPS.txt",".\\GPS_Get_PreProcesser\\10-02-2015\\locationGPS.txt")
def getFolderNum(username):
    getdir = GetDirName()
    dirlist = getdir.printPath(username)
    print(dirlist)
Beispiel #40
0
                savefile.write(seitem[len(seitem) - 1])
                savefile.write('\n')

        #savefile=open(infile.replace('appriori_sequence.csv','sequencefrequence.txt'),'w')
        # for subitem in
        # savefile.write(str(sequence_Frequence))
        # savefile.close()
    #return sequence_Frequence


if __name__ == '__main__':
    #initAprioriitem()
    #
    # print getAll_label2dic()

    getdir = GetDirName()
    Fulldirlist = []
    parent_path = os.path.dirname(os.getcwd())
    #print(parent_path)
    dirlist = getdir.printPath(parent_path + os.sep + "starlog")
    #print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir = (getdir.printPath(parent_path + os.sep + "starlog" +
                                      os.sep + dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path + os.sep + "starlog" + os.sep +
                               dir + os.sep + secdir + os.sep +
                               'RCed_stoppoint.txt')

    for i in Fulldirlist:









if __name__=='__main__':
     #initAprioriitem()
    #
    # print getAll_label2dic()

    getdir=GetDirName()
    Fulldirlist=[]
    parent_path = os.path.dirname(os.getcwd())
    #print(parent_path)
    dirlist=getdir.printPath(parent_path+os.sep+"starlog")
    #print(dirlist)
    for dir in dirlist:
        #print(dir)
        seconddir=(getdir.printPath(parent_path+os.sep+"starlog"+os.sep+dir))
        for secdir in seconddir:
            Fulldirlist.append(parent_path+os.sep+"starlog"+os.sep+dir+os.sep+secdir+os.sep+'RCed_stoppoint.txt')


    for i in Fulldirlist:
        print(i)
        dic_labelTag,frequentSet= getFrequentItem(i)