Exemple #1
0
    def iterateFile(self, file):
        print "Delta for current running %f" % self.delta
        count = 0
        with open(file, 'rb') as f:
            firstLine = True
            currentNo = -1
            fromLat = -1
            fromLng = -1
            fromTime = -1

            for line in f:
                count += 1
                line = line.strip('\n')
                linelist = line.split(',')

                # 旅程标识
                no = "%s-%s-%s-%s" % (linelist[5], linelist[6], linelist[8],
                                      linelist[9])
                toLat = linelist[3]
                toLng = linelist[4]
                toTime = int(linelist[2])

                if firstLine:  # 第一行初始化
                    firstLine = False
                    currentNo = no
                    fromLat = toLat
                    fromLng = toLng
                    fromTime = toTime
                else:
                    if currentNo == no:  # 同一段旅程
                        # 如果当前点位置不变则继续遍历
                        if (fromLat == toLat
                                and fromLng == toLng) or fromTime == toTime:
                            continue

                        fPoint = [float(fromLng), float(fromLat)]
                        tPoint = [float(toLng), float(toLat)]

                        fromGid = getFormatGID(fPoint)['gid']
                        toGid = getFormatGID(tPoint)['gid']
                        distance = getRealDistance(fromLng, fromLat, toLng,
                                                   toLat)
                        speed = distance / (toTime - fromTime)
                        direction = getDirection(fPoint,
                                                 tPoint)  # w n s e 四个字符之一

                        self.updateResByLine(fPoint, tPoint, fromGid, toGid,
                                             direction, speed)

                        fromLat = toLat
                        fromLng = toLng
                        fromTime = toTime
                    else:  # 新旅程第一个点
                        currentNo = no
                        fromLat = toLat
                        fromLng = toLng
                        fromTime = toTime

        f.close()
        print "Total %d records in this file." % (count)
def main():
    start_time = time.time()
    global MAX_INDEX
    from_array = [0] * 10000000
    to_array = [0] * 10000000

    for file_index in range(startIndex, endIndex + 1):
        filename_r = "traveldata-" + str(file_index)
        filename_w = str(file_index % 24) + "-" + str(file_index)

        content = file(readPath + filename_r, 'r').read()
        records = content.split("\n")

        for record_index in range(len(records) - 1):
            record = records[record_index]
            columns = record.split(",")
            gridId = getFormatGID(
                [columns[4], columns[3]], 0.0064, 0.005, {
                    'north': 40.2500,
                    'south': 38.5667,
                    'west': 116.7167,
                    'east': 118.3233,
                })["gid"]

            if gridId > MAX_INDEX:
                MAX_INDEX = gridId
            if columns[5] == "src":
                from_array[gridId] = from_array[gridId] + 1
            elif columns[5] == "dst":
                to_array[gridId] = to_array[gridId] + 1

        resultFile = file(writePath + filename_w, 'w')

        for i in range(MAX_INDEX + 1):
            resultFile.write(
                str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) +
                "\n")
            from_array[i] = 0
            to_array[i] = 0
        resultFile.close()
        MAX_INDEX = 0
    # filename_w = str(4017) + "-" + str(4017);
    # resultFile = file(writePath + filename_w, 'w');
    # for i in range(MAX_INDEX + 1):
    #     resultFile.write(str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) + "\n");
    #     from_array[i] = 0;
    #     to_array[i] = 0;
    # resultFile.close();
    # MAX_INDEX = 0;

    print "Complete Time:" + str(time.time() - start_time)
def main():
    start_time = time.time()
    global MAX_INDEX
    from_array = [0] * 10000000
    to_array = [0] * 10000000

    srcStayRecordDict = {}
    dstStayRecordDict = {}
    for file_index in range(startIndex, endIndex + 1):
        filename_r = "traveldata-" + str(file_index)
        filename_w = str(file_index % 24) + "-" + str(file_index)

        content = file(readPath + filename_r, 'r').read()
        records = content.split("\n")

        for record_index in range(len(records) - 1):
            record = records[record_index]
            columns = record.split(",")
            gridId = getFormatGID([columns[4], columns[3]])["gid"]

            key = str(columns[1]) + '-' + str(columns[2])

            if gridId > MAX_INDEX:
                MAX_INDEX = gridId
            if columns[5] == "src" and not srcStayRecordDict.has_key(key):
                srcStayRecordDict[key] = True
                from_array[gridId] = from_array[gridId] + 1
            elif columns[5] == "dst" and not dstStayRecordDict.has_key(key):
                dstStayRecordDict[key] = True
                to_array[gridId] = to_array[gridId] + 1

        #resultFile = file(writePath + filename_w, 'w');

        # for i in range(MAX_INDEX + 1):
        #     resultFile.write(str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) + "\n");
        #     from_array[i] = 0;
        #     to_array[i] = 0;
        # resultFile.close();
        # MAX_INDEX = 0;
    filename_w = str(4175) + "-" + str(4175)
    resultFile = file(writePath + filename_w, 'w')
    for i in range(MAX_INDEX + 1):
        resultFile.write(
            str(i) + "," + str(from_array[i]) + "," + str(to_array[i]) + "\n")
        from_array[i] = 0
        to_array[i] = 0
    resultFile.close()
    MAX_INDEX = 0

    print "Complete Time:" + str(time.time() - start_time)
    def iterateFileNew(self, file):
        print "Delta for current running %f" % self.delta
        count = 0
        with open(file, 'rb') as f:
            firstLine = True

            deviceDirectionDict = {}
            # deviceDirectionToDict = {}

            for line in f:
                count += 1
                line = line.strip('\n')
                linelist = line.split(',')
                #print(getRealDistance(linelist[5], linelist[6], linelist[4], linelist[3])/(linelist[2] - linelist[7]))
                currentDevice = linelist[1]
                if firstLine:
                    firstLine = False

                if not deviceDirectionDict.has_key(currentDevice):
                    deviceDirectionDict[currentDevice] = {}

                # if not deviceDirectionToDict.has_key(currentDevice):
                # 	deviceDirectionToDict[currentDevice] = {}

                # left
                fromLat = linelist[6]
                fromLng = linelist[5]
                fromTime = int(linelist[7])

                toLat = linelist[3]
                toLng = linelist[4]
                toTime = int(linelist[2])

                if (fromLat == toLat
                        and fromLng == toLng) or fromTime == toTime:
                    continue

                fPoint = [float(fromLng), float(fromLat)]
                tPoint = [float(toLng), float(toLat)]

                fromGid = getFormatGID(fPoint, self.LngSPLIT, self.LatSPLIT,
                                       self.locs)['gid']
                toGid = getFormatGID(tPoint, self.LngSPLIT, self.LatSPLIT,
                                     self.locs)['gid']
                distance = getRealDistance(fromLng, fromLat, toLng, toLat)
                speed = distance / (toTime - fromTime)
                direction = getDirection(fPoint, tPoint)  # w n s e 四个字符之一

                position = fromLat + '-' + fromLng

                if not deviceDirectionDict[currentDevice].has_key(position):
                    # 当前位置没有被记录过, 那么要记录当前位置的方向。
                    deviceDirectionDict[currentDevice][position] = True
                    self.updateResByLine(fPoint, tPoint, fromGid, toGid,
                                         direction, speed)

                # right
                fromLat = toLat
                fromLng = toLng
                fromTime = toTime

                toLat = linelist[9]
                toLng = linelist[8]
                toTime = int(linelist[10])

                if (fromLat == toLat
                        and fromLng == toLng) or fromTime == toTime:
                    continue

                fPoint = [float(fromLng), float(fromLat)]
                tPoint = [float(toLng), float(toLat)]

                fromGid = getFormatGID(fPoint, self.LngSPLIT, self.LatSPLIT,
                                       self.locs)['gid']
                toGid = getFormatGID(tPoint, self.LngSPLIT, self.LatSPLIT,
                                     self.locs)['gid']
                distance = getRealDistance(fromLng, fromLat, toLng, toLat)
                speed = distance / (toTime - fromTime)
                direction = getDirection(fPoint, tPoint)  # w n s e 四个字符之一

                position = fromLat + '-' + fromLng

                if not deviceDirectionDict[currentDevice].has_key(position):
                    # 当前位置没有被记录过, 那么要记录当前位置的方向。

                    deviceDirectionDict[currentDevice][position] = True
                    self.updateResByLine(fPoint, tPoint, fromGid, toGid,
                                         direction, speed)

                # position = toLat + '-' + toLng
                # if not deviceDirectionToDict[currentDevice].has_key(position):
                # 	deviceDirectionDict[currentDevice][position] = [fPoint, tPoint, fromGid, toGid, direction, speed, 'to', distance]
                # else:
                # 	if deviceDirectionDict[currentDevice][position][7] < distance:
                # 		deviceDirectionDict[currentDevice][position] =  [fPoint, tPoint, fromGid, toGid, direction, speed, 'to', distance]

        f.close()
        print "Total %d records in this file." % (count)
    ifilename = "traveldata-" + str(hourId)
    ofilename = str(hourId % 24) + "-" + str(hourId)

    ifile = os.path.join(INPUT_PATH, ifilename)

    res = [0] * 104000
    #遍历
    with open(ifile, 'rb') as f:
        devIdDict = {}
        for line in f:
            linelist = line.split(',')

            lat = linelist[3]
            lng = linelist[4]

            gid = getFormatGID([lng, lat])['gid']
            devId = int(linelist[1])

            if devIdDict.has_key(gid):
                if devId not in devIdDict[gid]:
                    devIdDict[gid].append(devId)
                    res[gid] += 1
            else:
                devIdDict[gid] = [devId]
                res[gid] += 1
            #res[gid] += 1
    f.close()
    # 写入文件
    preline=[[] for j in xrange(len(res))];

    ofile = os.path.join(OUTPUT_PATH, ofilename)
# -*- coding: utf-8 -*-
__author__ = 'lenovo'

from util.tripFlow.base import getFormatGID
from util.tripFlow.base import getDirection
from util.tripFlow.extractGridEdges import ExtractGridEdges

tPoint = [161.12, 40.35]
fPoint = [161.57, 39.79]

PROP = {
    'index': 9,
    'delta': -1,
    'IDIRECTORY': '/datahouse',
    'ODIRECTORY': '/datahouse',
    'suffix': 1
}
task = ExtractGridEdges(PROP)
direction = getDirection(fPoint, tPoint)
fromGid = getFormatGID(fPoint)['gid']
toGid = getFormatGID(tPoint)['gid']
print(direction)
speed = 2.3
task.updateResByLine(fPoint, tPoint, fromGid, toGid, direction, speed)

print(task.resByCate['from'])
print(task.resByCate['to'])

#证明from和to的方向是一样的
# point4 = [116.69815063476564, 39.78980820192016]
# point5 = [116.45507812500001,39.91097066634995]
# point6 = [116.28890991210939,39.96204017008559]
# point7 = [116.38092041015626,39.86407956071788]
# point8 = [115.97442626953126,40.48109956299565]
# point9 = [ 116.33525848388673,39.953265311384946]
# point10 = [116.43379211425783,39.977999795258164]
# point11 = [116.60828590393068,39.975775866942584]
# point12 = [116.60828590393068,39.974970137530484]
# point13 =[116.55749559402467,  39.94529952809171]
# point14 = [116.3978409538867,39.90590128660034]
point15 = [116.4556, 39.9095]
#points = [point1, point2, point3, point4, point5, point6, point7, point8, point9, point10]
points = [point15]

gridIds = [getFormatGID(point)['gid'] for point in points]
#gridIds = [i for i in range(104000)]
#print(gridIds)
hourIds = [12]
#hourIds = [i for i in range(24)]

startIndex = 167
endIndex = 2088
readPath = '/datahouse/tripflow/Anomaly/bj-byhour-statics/'



#stay_arr = [[[] for j in range(len(points))] for i in range(len(hourIds))]
travel_arr = [[[] for j in range(len(gridIds))] for i in range(len(hourIds))]
#total_arr = [[[] for j in range(len(points))] for i in range(len(hourIds))]
#travel_arr = [0 for j in range(len(gridIds))]
Exemple #8
0
    def getNextGIDs(self, point, direction):
        """
		获取方向后续的前 N 个交点
			:param self: 
			:param point: [lng, lat, gid] 
			:param direction: 
		"""
        res = []
        [x, y] = direction
        fromLat = point[1]
        fromLng = point[0]
        fromGID = point[2]
        parsedObj = parseFormatGID(fromGID)
        baseLatCenter = parsedObj['lat']
        baseLngCenter = parsedObj['lng']

        latDir = 1 if y > 0 else -1
        lngDir = 1 if x > 0 else -1

        # 存储所有交点数据
        jumpPoints = []

        # 计算网格方边交点
        if y != 0:  # 与平行纬度线相交
            k = x / y
            for i in xrange(0, self.custom_params['jump_length']):
                incrementLat = baseLatCenter + self.custom_params[
                    'LatSPLIT'] * (0.5 + i) * latDir - fromLat
                iLng = fromLng + incrementLat * k
                iLat = incrementLat + fromLat
                # key = '0,%d' % (i)
                jumpPoints.append([iLng, iLat, 0, i])

        if x != 0:  # 与平行经度线相交
            k = y / x
            for i in xrange(0, self.custom_params['jump_length']):
                incrementLng = baseLngCenter + self.custom_params[
                    'LngSPLIT'] * (0.5 + i) * lngDir - fromLng
                iLat = fromLat + incrementLng * k
                iLng = incrementLng + fromLng
                # key = '%d,0' % (i)
                jumpPoints.append([iLng, iLat, i, 0])

        if x != 0 and y != 0:
            # 根据纬度从小到大排前三
            for i in xrange(self.custom_params['jump_length'],
                            self.custom_params['jump_length'] * 2):
                currentIndex = -1
                currentMax = jumpPoints[i][0]

                for jumpIndex in xrange(0, self.custom_params['jump_length']):
                    if (jumpPoints[jumpIndex][0] * lngDir) > (currentMax *
                                                              lngDir):
                        currentIndex = jumpIndex
                        currentMax = jumpPoints[jumpIndex][0]

                if currentIndex != -1:
                    jumpPoints[currentIndex] = jumpPoints[i][:]

        # 确定该方向离圆心最小的交点
        minLng = jumpPoints[0][0]
        minLat = jumpPoints[0][1]
        originGid = 0
        if x != 0 and y != 0:
            for i in xrange(1, self.custom_params['jump_length']):
                if minLng * lngDir > jumpPoints[i][0] * lngDir:
                    minLng = jumpPoints[i][0]
                    minLat = jumpPoints[i][1]

        # 只取三个交点
        updateOriginGid = False
        for i in xrange(0, self.custom_params['jump_length']):
            ilng = jumpPoints[i][0]
            ilat = jumpPoints[i][1]

            # if ilng == minLng and ilat == minLat:
            # 	updateOriginGid = True

            if jumpPoints[i][2] == 0:
                ilat += 0.002 * latDir  # 0.002 为一小点偏量,使交点在对应网格内而不至于只卡在边界上
            else:
                ilng += 0.002 * lngDir

            point = getFormatGID([ilng, ilat])
            gid = point['gid']
            # if updateOriginGid:
            # 	updateOriginGid = False
            # 	originGid = gid
            res.append([ilng, ilat, gid])

        return {
            'res': res,
            'endPoints': [minLng, minLat]
            # 'originGid': originGid
        }
Exemple #9
0
    def iterateFile(self, ifile):
        """
		遍历文件,构建 gid-record 字典以及种子方向列表
			:param self: 
			:param ifile: 
		"""
        cateKeys = self.cateKeys

        seedUnit = self.custom_params['seed_unit']
        gridDirNum = self.custom_params['grid_dirnum']

        # 网格列表以及网格与所属记录的对应字典
        gridDevNumDict, gridDevNumList = {
            'from': {},
            'to': {}
        }, {
            'from': [],
            'to': []
        }
        gridRecsDict = {'from': {}, 'to': {}}

        with open(ifile, 'rb') as f:
            nodeID = 0
            for line in f:
                line = line.strip('\n')
                linelist = line.split(',')

                #
                linelist[0] = float(linelist[0])
                linelist[1] = float(linelist[1])
                formatGID = getFormatGID([linelist[0], linelist[1]])
                # {gid, lngind, latind} = formatGID
                gid = formatGID['gid']
                lngind = formatGID['lngind']
                latind = formatGID['latind']
                gdirStr = linelist[2]

                linelist[6] = float(linelist[6])
                linelist[5] = float(linelist[5])

                linelist[3] = float(linelist[3])
                linelist[4] = int(linelist[4])

                linelist.extend([gid, lngind, latind, nodeID])
                nodeID += 1

                strGID = str(gid)
                if strGID in gridDevNumDict[gdirStr].keys():
                    gridDevNumDict[gdirStr][strGID] += linelist[4]
                    gridRecsDict[gdirStr][strGID].append(linelist[:])
                else:
                    gridDevNumDict[gdirStr][strGID] = linelist[4]
                    gridRecsDict[gdirStr][strGID] = [linelist[:]]

                # res[gdirStr].append(linelist[:])
        f.close()

        # 构建 grid 排序数组
        for dirKey, cateName in cateKeys.iteritems():
            for key, val in gridDevNumDict[cateName].iteritems():
                gridDevNumList[cateName].append([key, val])

        # 前 N 方向筛选
        N = self.custom_params['grid_dirnum']
        if N == -1:
            N = 999

        res = {'from': [], 'to': []}

        for dirKey, cateName in cateKeys.iteritems():
            for gid, reclist in gridRecsDict[cateName].iteritems():
                reclist.sort(key=lambda x: x[4], reverse=True)
                if len(reclist) >= N:
                    res[cateName] += reclist[0:N]
                else:
                    res[cateName] += reclist[:]

        # 分 from/to 方向,按照 deviceNum 排序
        for dirKey, cateName in cateKeys.iteritems():
            nodeLen = len(res[cateName])
            for i in xrange(0, nodeLen):
                currentLine = res[cateName][i]
                gidStr = str(currentLine[-4])
                if gidStr in self.recDict[cateName].keys():
                    self.recDict[cateName][gidStr].append(currentLine)
                else:
                    self.recDict[cateName][gidStr] = [currentLine]

        resInAll = copy.deepcopy(res)
        resByGID = copy.deepcopy(gridRecsDict)
        self.pickUpSeeds(resInAll, gridDevNumList, resByGID)

        return int(nodeID / 2)