def iterateFile(self, file): print "Delta for current running %f" % self.delta count = 0 with open(file, 'rb') as f: firstLine = True currentNo = -1 fromLat = -1 fromLng = -1 fromTime = -1 for line in f: count += 1 line = line.strip('\n') linelist = line.split(',') # 旅程标识 no = "%s-%s-%s-%s" % (linelist[5], linelist[6], linelist[8], linelist[9]) toLat = linelist[3] toLng = linelist[4] toTime = int(linelist[2]) if firstLine: # 第一行初始化 firstLine = False currentNo = no fromLat = toLat fromLng = toLng fromTime = toTime else: if currentNo == no: # 同一段旅程 # 如果当前点位置不变则继续遍历 if (fromLat == toLat and fromLng == toLng) or fromTime == toTime: continue fPoint = [float(fromLng), float(fromLat)] tPoint = [float(toLng), float(toLat)] fromGid = getFormatGID(fPoint)['gid'] toGid = getFormatGID(tPoint)['gid'] distance = getRealDistance(fromLng, fromLat, toLng, toLat) speed = distance / (toTime - fromTime) direction = getDirection(fPoint, tPoint) # w n s e 四个字符之一 self.updateResByLine(fPoint, tPoint, fromGid, toGid, direction, speed) fromLat = toLat fromLng = toLng fromTime = toTime else: # 新旅程第一个点 currentNo = no fromLat = toLat fromLng = toLng fromTime = toTime f.close() print "Total %d records in this file." % (count)
def main(): start_time = time.time() global MAX_INDEX from_array = [0] * 10000000 to_array = [0] * 10000000 for file_index in range(startIndex, endIndex + 1): filename_r = "traveldata-" + str(file_index) filename_w = str(file_index % 24) + "-" + str(file_index) content = file(readPath + filename_r, 'r').read() records = content.split("\n") for record_index in range(len(records) - 1): record = records[record_index] columns = record.split(",") gridId = getFormatGID( [columns[4], columns[3]], 0.0064, 0.005, { 'north': 40.2500, 'south': 38.5667, 'west': 116.7167, 'east': 118.3233, })["gid"] if gridId > MAX_INDEX: MAX_INDEX = gridId if columns[5] == "src": from_array[gridId] = from_array[gridId] + 1 elif columns[5] == "dst": to_array[gridId] = to_array[gridId] + 1 resultFile = file(writePath + filename_w, 'w') for i in range(MAX_INDEX + 1): resultFile.write( str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) + "\n") from_array[i] = 0 to_array[i] = 0 resultFile.close() MAX_INDEX = 0 # filename_w = str(4017) + "-" + str(4017); # resultFile = file(writePath + filename_w, 'w'); # for i in range(MAX_INDEX + 1): # resultFile.write(str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) + "\n"); # from_array[i] = 0; # to_array[i] = 0; # resultFile.close(); # MAX_INDEX = 0; print "Complete Time:" + str(time.time() - start_time)
def main(): start_time = time.time() global MAX_INDEX from_array = [0] * 10000000 to_array = [0] * 10000000 srcStayRecordDict = {} dstStayRecordDict = {} for file_index in range(startIndex, endIndex + 1): filename_r = "traveldata-" + str(file_index) filename_w = str(file_index % 24) + "-" + str(file_index) content = file(readPath + filename_r, 'r').read() records = content.split("\n") for record_index in range(len(records) - 1): record = records[record_index] columns = record.split(",") gridId = getFormatGID([columns[4], columns[3]])["gid"] key = str(columns[1]) + '-' + str(columns[2]) if gridId > MAX_INDEX: MAX_INDEX = gridId if columns[5] == "src" and not srcStayRecordDict.has_key(key): srcStayRecordDict[key] = True from_array[gridId] = from_array[gridId] + 1 elif columns[5] == "dst" and not dstStayRecordDict.has_key(key): dstStayRecordDict[key] = True to_array[gridId] = to_array[gridId] + 1 #resultFile = file(writePath + filename_w, 'w'); # for i in range(MAX_INDEX + 1): # resultFile.write(str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) + "\n"); # from_array[i] = 0; # to_array[i] = 0; # resultFile.close(); # MAX_INDEX = 0; filename_w = str(4175) + "-" + str(4175) resultFile = file(writePath + filename_w, 'w') for i in range(MAX_INDEX + 1): resultFile.write( str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) + "\n") from_array[i] = 0 to_array[i] = 0 resultFile.close() MAX_INDEX = 0 print "Complete Time:" + str(time.time() - start_time)
def iterateFileNew(self, file): print "Delta for current running %f" % self.delta count = 0 with open(file, 'rb') as f: firstLine = True deviceDirectionDict = {} # deviceDirectionToDict = {} for line in f: count += 1 line = line.strip('\n') linelist = line.split(',') #print(getRealDistance(linelist[5], linelist[6], linelist[4], linelist[3])/(linelist[2] - linelist[7])) currentDevice = linelist[1] if firstLine: firstLine = False if not deviceDirectionDict.has_key(currentDevice): deviceDirectionDict[currentDevice] = {} # if not deviceDirectionToDict.has_key(currentDevice): # deviceDirectionToDict[currentDevice] = {} # left fromLat = linelist[6] fromLng = linelist[5] fromTime = int(linelist[7]) toLat = linelist[3] toLng = linelist[4] toTime = int(linelist[2]) if (fromLat == toLat and fromLng == toLng) or fromTime == toTime: continue fPoint = [float(fromLng), float(fromLat)] tPoint = [float(toLng), float(toLat)] fromGid = getFormatGID(fPoint, self.LngSPLIT, self.LatSPLIT, self.locs)['gid'] toGid = getFormatGID(tPoint, self.LngSPLIT, self.LatSPLIT, self.locs)['gid'] distance = getRealDistance(fromLng, fromLat, toLng, toLat) speed = distance / (toTime - fromTime) direction = getDirection(fPoint, tPoint) # w n s e 四个字符之一 position = fromLat + '-' + fromLng if not deviceDirectionDict[currentDevice].has_key(position): # 当前位置没有被记录过, 那么要记录当前位置的方向。 deviceDirectionDict[currentDevice][position] = True self.updateResByLine(fPoint, tPoint, fromGid, toGid, direction, speed) # right fromLat = toLat fromLng = toLng fromTime = toTime toLat = linelist[9] toLng = linelist[8] toTime = int(linelist[10]) if (fromLat == toLat and fromLng == toLng) or fromTime == toTime: continue fPoint = [float(fromLng), float(fromLat)] tPoint = [float(toLng), float(toLat)] fromGid = getFormatGID(fPoint, self.LngSPLIT, self.LatSPLIT, self.locs)['gid'] toGid = getFormatGID(tPoint, self.LngSPLIT, self.LatSPLIT, self.locs)['gid'] distance = getRealDistance(fromLng, fromLat, toLng, toLat) speed = distance / (toTime - fromTime) direction = getDirection(fPoint, tPoint) # w n s e 四个字符之一 position = fromLat + '-' + fromLng if not deviceDirectionDict[currentDevice].has_key(position): # 当前位置没有被记录过, 那么要记录当前位置的方向。 deviceDirectionDict[currentDevice][position] = True self.updateResByLine(fPoint, tPoint, fromGid, toGid, direction, speed) # position = toLat + '-' + toLng # if not deviceDirectionToDict[currentDevice].has_key(position): # deviceDirectionDict[currentDevice][position] = [fPoint, tPoint, fromGid, toGid, direction, speed, 'to', distance] # else: # if deviceDirectionDict[currentDevice][position][7] < distance: # deviceDirectionDict[currentDevice][position] = [fPoint, tPoint, fromGid, toGid, direction, speed, 'to', distance] f.close() print "Total %d records in this file." % (count)
ifilename = "traveldata-" + str(hourId) ofilename = str(hourId % 24) + "-" + str(hourId) ifile = os.path.join(INPUT_PATH, ifilename) res = [0] * 104000 #遍历 with open(ifile, 'rb') as f: devIdDict = {} for line in f: linelist = line.split(',') lat = linelist[3] lng = linelist[4] gid = getFormatGID([lng, lat])['gid'] devId = int(linelist[1]) if devIdDict.has_key(gid): if devId not in devIdDict[gid]: devIdDict[gid].append(devId) res[gid] += 1 else: devIdDict[gid] = [devId] res[gid] += 1 #res[gid] += 1 f.close() # 写入文件 preline=[[] for j in xrange(len(res))]; ofile = os.path.join(OUTPUT_PATH, ofilename)
# -*- coding: utf-8 -*- __author__ = 'lenovo' from util.tripFlow.base import getFormatGID from util.tripFlow.base import getDirection from util.tripFlow.extractGridEdges import ExtractGridEdges tPoint = [161.12, 40.35] fPoint = [161.57, 39.79] PROP = { 'index': 9, 'delta': -1, 'IDIRECTORY': '/datahouse', 'ODIRECTORY': '/datahouse', 'suffix': 1 } task = ExtractGridEdges(PROP) direction = getDirection(fPoint, tPoint) fromGid = getFormatGID(fPoint)['gid'] toGid = getFormatGID(tPoint)['gid'] print(direction) speed = 2.3 task.updateResByLine(fPoint, tPoint, fromGid, toGid, direction, speed) print(task.resByCate['from']) print(task.resByCate['to']) #证明from和to的方向是一样的
# point4 = [116.69815063476564, 39.78980820192016] # point5 = [116.45507812500001,39.91097066634995] # point6 = [116.28890991210939,39.96204017008559] # point7 = [116.38092041015626,39.86407956071788] # point8 = [115.97442626953126,40.48109956299565] # point9 = [ 116.33525848388673,39.953265311384946] # point10 = [116.43379211425783,39.977999795258164] # point11 = [116.60828590393068,39.975775866942584] # point12 = [116.60828590393068,39.974970137530484] # point13 =[116.55749559402467, 39.94529952809171] # point14 = [116.3978409538867,39.90590128660034] point15 = [116.4556, 39.9095] #points = [point1, point2, point3, point4, point5, point6, point7, point8, point9, point10] points = [point15] gridIds = [getFormatGID(point)['gid'] for point in points] #gridIds = [i for i in range(104000)] #print(gridIds) hourIds = [12] #hourIds = [i for i in range(24)] startIndex = 167 endIndex = 2088 readPath = '/datahouse/tripflow/Anomaly/bj-byhour-statics/' #stay_arr = [[[] for j in range(len(points))] for i in range(len(hourIds))] travel_arr = [[[] for j in range(len(gridIds))] for i in range(len(hourIds))] #total_arr = [[[] for j in range(len(points))] for i in range(len(hourIds))] #travel_arr = [0 for j in range(len(gridIds))]
def getNextGIDs(self, point, direction): """ 获取方向后续的前 N 个交点 :param self: :param point: [lng, lat, gid] :param direction: """ res = [] [x, y] = direction fromLat = point[1] fromLng = point[0] fromGID = point[2] parsedObj = parseFormatGID(fromGID) baseLatCenter = parsedObj['lat'] baseLngCenter = parsedObj['lng'] latDir = 1 if y > 0 else -1 lngDir = 1 if x > 0 else -1 # 存储所有交点数据 jumpPoints = [] # 计算网格方边交点 if y != 0: # 与平行纬度线相交 k = x / y for i in xrange(0, self.custom_params['jump_length']): incrementLat = baseLatCenter + self.custom_params[ 'LatSPLIT'] * (0.5 + i) * latDir - fromLat iLng = fromLng + incrementLat * k iLat = incrementLat + fromLat # key = '0,%d' % (i) jumpPoints.append([iLng, iLat, 0, i]) if x != 0: # 与平行经度线相交 k = y / x for i in xrange(0, self.custom_params['jump_length']): incrementLng = baseLngCenter + self.custom_params[ 'LngSPLIT'] * (0.5 + i) * lngDir - fromLng iLat = fromLat + incrementLng * k iLng = incrementLng + fromLng # key = '%d,0' % (i) jumpPoints.append([iLng, iLat, i, 0]) if x != 0 and y != 0: # 根据纬度从小到大排前三 for i in xrange(self.custom_params['jump_length'], self.custom_params['jump_length'] * 2): currentIndex = -1 currentMax = jumpPoints[i][0] for jumpIndex in xrange(0, self.custom_params['jump_length']): if (jumpPoints[jumpIndex][0] * lngDir) > (currentMax * lngDir): currentIndex = jumpIndex currentMax = jumpPoints[jumpIndex][0] if currentIndex != -1: jumpPoints[currentIndex] = jumpPoints[i][:] # 确定该方向离圆心最小的交点 minLng = jumpPoints[0][0] minLat = jumpPoints[0][1] originGid = 0 if x != 0 and y != 0: for i in xrange(1, self.custom_params['jump_length']): if minLng * lngDir > jumpPoints[i][0] * lngDir: minLng = jumpPoints[i][0] minLat = jumpPoints[i][1] # 只取三个交点 updateOriginGid = False for i in xrange(0, self.custom_params['jump_length']): ilng = jumpPoints[i][0] ilat = jumpPoints[i][1] # if ilng == minLng and ilat == minLat: # updateOriginGid = True if jumpPoints[i][2] == 0: ilat += 0.002 * latDir # 0.002 为一小点偏量,使交点在对应网格内而不至于只卡在边界上 else: ilng += 0.002 * lngDir point = getFormatGID([ilng, ilat]) gid = point['gid'] # if updateOriginGid: # updateOriginGid = False # originGid = gid res.append([ilng, ilat, gid]) return { 'res': res, 'endPoints': [minLng, minLat] # 'originGid': originGid }
def iterateFile(self, ifile): """ 遍历文件,构建 gid-record 字典以及种子方向列表 :param self: :param ifile: """ cateKeys = self.cateKeys seedUnit = self.custom_params['seed_unit'] gridDirNum = self.custom_params['grid_dirnum'] # 网格列表以及网格与所属记录的对应字典 gridDevNumDict, gridDevNumList = { 'from': {}, 'to': {} }, { 'from': [], 'to': [] } gridRecsDict = {'from': {}, 'to': {}} with open(ifile, 'rb') as f: nodeID = 0 for line in f: line = line.strip('\n') linelist = line.split(',') # linelist[0] = float(linelist[0]) linelist[1] = float(linelist[1]) formatGID = getFormatGID([linelist[0], linelist[1]]) # {gid, lngind, latind} = formatGID gid = formatGID['gid'] lngind = formatGID['lngind'] latind = formatGID['latind'] gdirStr = linelist[2] linelist[6] = float(linelist[6]) linelist[5] = float(linelist[5]) linelist[3] = float(linelist[3]) linelist[4] = int(linelist[4]) linelist.extend([gid, lngind, latind, nodeID]) nodeID += 1 strGID = str(gid) if strGID in gridDevNumDict[gdirStr].keys(): gridDevNumDict[gdirStr][strGID] += linelist[4] gridRecsDict[gdirStr][strGID].append(linelist[:]) else: gridDevNumDict[gdirStr][strGID] = linelist[4] gridRecsDict[gdirStr][strGID] = [linelist[:]] # res[gdirStr].append(linelist[:]) f.close() # 构建 grid 排序数组 for dirKey, cateName in cateKeys.iteritems(): for key, val in gridDevNumDict[cateName].iteritems(): gridDevNumList[cateName].append([key, val]) # 前 N 方向筛选 N = self.custom_params['grid_dirnum'] if N == -1: N = 999 res = {'from': [], 'to': []} for dirKey, cateName in cateKeys.iteritems(): for gid, reclist in gridRecsDict[cateName].iteritems(): reclist.sort(key=lambda x: x[4], reverse=True) if len(reclist) >= N: res[cateName] += reclist[0:N] else: res[cateName] += reclist[:] # 分 from/to 方向,按照 deviceNum 排序 for dirKey, cateName in cateKeys.iteritems(): nodeLen = len(res[cateName]) for i in xrange(0, nodeLen): currentLine = res[cateName][i] gidStr = str(currentLine[-4]) if gidStr in self.recDict[cateName].keys(): self.recDict[cateName][gidStr].append(currentLine) else: self.recDict[cateName][gidStr] = [currentLine] resInAll = copy.deepcopy(res) resByGID = copy.deepcopy(gridRecsDict) self.pickUpSeeds(resInAll, gridDevNumList, resByGID) return int(nodeID / 2)