def getGridList(self):
        """connect to mongoDB and get gridlist results
		
		Args:
			collection (string): collection name that stored city grid set
		
		Returns:
			TYPE: false grid uid list, true grid object collection
		"""
        conn, mdb = connectMongo(self.db['dbname'])
        grid = mdb[self.db['gridcolname']]

        falseuidlist = []
        truegridobj = {}

        gridlists = list(
            grid.find({}, {
                "properties.typevalid": 1,
                "properties.uid": 1,
                "properties.vec": 1
            }))

        for gridlist in gridlists:
            typevalid, uid, vec = gridlist['properties']['typevalid'], str(
                gridlist['properties']['uid']), gridlist['properties']['vec']

            if typevalid == False:
                falseuidlist.append(uid)
            else:
                truegridobj[uid] = vec

        conn.close()
        return falseuidlist, truegridobj
Example #2
0
	def __init__(self, city, citylocs, defaultRadius, IP):
		super(CityGrid, self).__init__()
		self.city = city
		self.citylocs = citylocs
		self.defaultRadius = defaultRadius * 2
		self.maxQRadius = 500
		self.db = {
			'url': IP,
			'port': 27017,
			'dbname': 'tdnormal',
			'gridcolname': 'newgrids_%s' % city,
			'POIcolname': 'pois_%s' % city
		}

		conn, db = connectMongo(self.db['dbname'])
		grid = db[self.db['gridcolname']]
		POIs = db[self.db['POIcolname']]
		try:
			grid.create_index([
				("properties.typevalid", pymongo.ASCENDING),
				("properties.vec", pymongo.ASCENDING)
			])
			grid.create_index([("properties.center", pymongo.GEOSPHERE)])
			POIs.create_index([("properties.center", pymongo.GEOSPHERE)])
		except Exception as e:
			raise e
		conn.close()
Example #3
0
def aggregateEntropy(type, dbName, collectionName, data):
	conn, db = connectMongo(dbName)

	rawData = list(db[collectionName].find({}, {'_id': 1, 'vec': 1}))
	if type == 'temporal':
		pass
	elif type == 'spatial':
		pass

	conn.close()
    def sepFeatureTaskes(self, falseuidlist, truegridobj, userlist):
        """seperate getting feature work into different subtasks
		
		Args:
			falseuidlist (array): false grid uid list
			truegridobj (object): true grid object collection
			userlist (array): user list
		
		Returns:
			NULL: Description
		"""
        ppservers = ()

        # remove all documents in beijing_features collection
        conn, mdb = connectMongo(self.db['dbname'])
        mdb[self.db['featurecolname']].remove({})
        conn.close()

        job_server = pp.Server(ppservers=ppservers)
        logging.info("pp 可以用的工作核心线程数 %s workers" % job_server.get_ncpus())
        start_time = time.time()
        jobs, index = [], 0
        for sublist in userlist:
            jobs.append((index,
                         job_server.submit(self.aggregateVector, (
                             sublist,
                             falseuidlist,
                             truegridobj,
                         ), (
                             self.vecAdd,
                             connectMongo,
                             connectMYSQL,
                             getCityLocs,
                             initTimePeriods,
                             judFeatureTP,
                         ), (
                             "os",
                             "time",
                             "pp",
                             "CommonFunc",
                             "numpy",
                             "pymongo",
                             "MySQLdb",
                             "logging",
                             "sys",
                             "gc",
                         ))))
            index += 1

        for index, job in jobs:
            job()

        logging.info("多线程下执行耗时: %ss" % str(time.time() - start_time))
        job_server.print_stats()
Example #5
0
def upDotsBlongedDis(city, dic):
    # 获得 grids
    grids = []  # 存储结果
    count, validcount = 0, 0
    conn, db = connectMongo('tdnormal')
    GRID = db['newgrids_%s' % city]

    # 获取所有 grids 结果
    dbgrids = list(GRID.find({}, {
        "properties.uid": 1,
        "properties.center": 1
    }))

    # 构建栅栏数组
    disobjs = []
    with open(os.path.join(dic, getAbbName(city) + '.json')) as f:
        data = json.load(f)
        features = data['features']

        for each in features:
            disobjs.append({
                'name': each['properties']['name'],
                'geo': shape(each['geometry'])
            })
        # 处理围栏数据

    for each in dbgrids:
        try:
            coords = each['properties']['center']['coordinates']
            point = Point(coords[0], coords[1])
            index = getDisIndex(point, disobjs)
            count += 1
            if index != -1:
                validcount += 1
                if validcount % 10000 == 0:
                    print 'Valid Grids num: %d, Total Grids num: %d' % (
                        validcount, count)

                grids.append([each['properties']['uid'], index])
        except Exception as e:
            print each
            print e
            continue
        finally:
            pass

    print "City %s owns valid distict grids %d" % (city, len(grids))
    return grids
Example #6
0
def calUsersEntropy(dbName, collectionName):
	conn, db = connectMongo(dbName)
	# get all user features
	rawData = list(db[collectionName].find({}, {'pVec': 1}))
	print 'Query %s data in %s' % (str(len(rawData)), collectionName)

	# update entropy values
	bulk = db[collectionName].initialize_ordered_bulk_op()
	for item in rawData:
		pVec = [each[:-1] for each in item['pVec']]
		
		# Calculate sum results by columns and rows
		# pVecSum = np.matrix(pVec).sum(dtype='float')
		pVecColSum, pVecRowSum = getMatrixSumbyDim(pVec, 'column'), getMatrixSumbyDim(pVec, 'row')

		if type(pVecColSum) is not int:
			# Calculates entropy results by temporal or spatial mode
			colEntropy = sc.entropy(pVecColSum)
			rowEntropy = sc.entropy(pVecRowSum)

			bulk.find({'_id': int(item['_id'])}).update({'$set': {
				'entropy' : {
					'col': colEntropy,
					'row': rowEntropy
				}  
			}})
		else:
			bulk.find({'_id': int(item['_id'])}).update({'$set': {
				'entropy' : {
					'col': -1,
					'row': -1
				}  
			}})
	
	# insert them all into mongoDB
	result = bulk.execute()
	print result

	conn.close()
Example #7
0
def main(argv):
    try:
        opts, args = getopt.getopt(
            argv, "hc:d:n:m:",
            ["help", "city=", 'directory=', 'number=', 'mode='])
    except getopt.GetoptError as err:
        print str(err)
        usage()
        sys.exit(2)

    # 处理输入参数
    city, directory, number, mode = 'beijing', '/enigma/tao.jiang/datasets/JingJinJi', 999, 'all'
    for opt, arg in opts:
        if opt == '-h':
            usage()
            sys.exit()
        elif opt in ("-c", "--city"):
            city = arg
        elif opt in ("-d", "--directory"):
            directory = arg
        elif opt in ('-n', '--number'):
            number = int(arg)
        elif opt in ('-m', '--mode'):
            mode = arg

    # cunchu
    STARTTIME = time.time()
    print "Start approach at %s" % STARTTIME

    conn, db = connectMongo('tdnormal')
    GRIDSNUM = db['newgrids_%s' % city].count()
    gridsData, validIDs = getGridsFromMongo(city, db)
    conn.close()

    CITYDISIND, CITYDISNUM = getCityDisInfo(city)

    if mode == 'all':
        # @多进程运行程序 START
        manager = Manager()
        jobs = []

        for x in xrange(0, 20):
            # time.sleep(random.random()*2)
            PROP = {
                'INDEX': x,
                'DIRECTORY': directory,
                'GRIDSNUM': GRIDSNUM,
                'CITY': city,
                'CITYDISIND': CITYDISIND,
                'CITYDISNUM': CITYDISNUM,
                'FILENUM': number
            }

            DATA = {'gridsData': gridsData, 'validIDs': validIDs}

            jobs.append(Process(target=processTask, args=(PROP, DATA)))
            jobs[x].start()

        # 等待所有进程结束
        for job in jobs:
            job.join()

    # Start to merge result files
    MERGE = time.time()
    print "Start merge at %s" % MERGE
    mergeMatrixFiles(city, GRIDSNUM, directory)
    print "End merge in %s" % str(time.time() - MERGE)

    ENDTIME = time.time()
    print "End approach at %s" % ENDTIME
Example #8
0
	def gridGeneration(self, split, locs={}):
		"""Generate City Grid sets
		
		Args:
		    split (float): distance interval of lat and lng, the unit is degree
		    gridname (string): grid collection name in mongoDB
		    poiname (string): POI collection name in mongoDB
		    locs (dict, optional): city grids' location region, in four directions
		
		Returns:
		    NULL: Description
		"""
		logging.info("CityGrid generation is starting...")

		if locs == {}:
			locs = self.citylocs

		count = 100000
		tmparray = []
		centerincrement = 0.0015 # round(split / 2.0, 4)
		latnum = int((locs['north'] - locs['south']) / split + 1)
		lngnum = int((locs['east'] - locs['west']) / split + 1)

		conn, db = connectMongo(self.db['dbname'])
		grid = db[self.db['gridcolname']]
		POIs = db[self.db['POIcolname']]

		for latind in xrange(0, latnum):
			for lngind in xrange(0, lngnum):
				lat = round(locs['south'] + latind * split, 3)
				lng = round(locs['west'] + lngind * split, 3)
				lnginc = round(lng+split, 3)
				latinc = round(lat+split, 3)
				lngcen = round(lng+centerincrement, 4)
				latcen = round(lat+centerincrement, 4)
				# 一个正方形 geojson 对象,代表当前方块对应的地理边界
				coordsarr = [ [lng, lat], [lnginc, lat], [lnginc, latinc], [lng, latinc], [lng, lat] ]

				featurelistarray = [0]*11
				typevalid = False

				# query all the POIs less than maxQRadius
				nearPOIList = list(POIs.find({
					"properties.center": {
						'$near': {
							'$geometry': { 'type': "Point", 'coordinates': [ lngcen, latcen ] },
							'$minDistance': 0,
							'$maxDistance': self.maxQRadius
						}
					}
				}))
				
				# construct vector with POIs types info
				poilen = len(nearPOIList)
				if poilen != 0:
					typevalid = True
					featurelistsum = 0

					# POI list is not null
					for each in nearPOIList:
						cpoint = each["properties"]["center"]["coordinates"]
						radius = each["properties"]["radius"]
						sigma = self.defaultRadius
						if radius > 0:
							sigma = radius * 2.0
						P = gaussian2D([lngcen, latcen], cpoint, sigma )
						featurelistsum += P
						
						curPInd = each["properties"]["ftype"] - 1
						featurelistarray[ curPInd ] += P

					# update feature vector
					if featurelistsum:
						featurelistarray = [each/featurelistsum for each in featurelistarray]
					else:
						typevalid = False
						print featurelistsum

				# single feature format
				# uid: to locate grid index according to it's lat and lng
				# vec: feature type
				# center: center position of current feature
				tmparray.append({
					"type": "Feature",
					"_id": "%s-%s-%s" % (self.city, str(lat), str(lng)),
					"properties": {
						"id": "%s-%s-%s" % (self.city, str(lat), str(lng)),
						"type": "Polygon",
						"vecvalid": typevalid,
						"center": {"type": "Point", "coordinates": [lngcen, latcen]},
						"uid": int(lngind + latind * lngnum),
						"vec": featurelistarray,
						'poinum': poilen
					},
					"geometry": {
						"type": "Polygon",
						"coordinates": [ coordsarr ]
					}
				})

				if len( tmparray ) == 100000:
					grid.insert( tmparray )
					tmparray = []
					gc.collect()
					logging.debug("100000 features has been inserted into mongoDB.")

		if len( tmparray ) != 0:
			grid.insert( tmparray )

		logging.info("Grid generation complete!")
		conn.close()
Example #9
0
def getValidGrids(city, dic):
    conn, mdb = connectMongo('tdnormal')
    collectname = 'newgrids_%s' % city

    citynames = {
        'beijing': 'bj',
        'tianjin': 'tj',
        'zhangjiakou': 'zjk',
        'tangshan': 'ts'
    }

    # 获取poi分布网格信息
    poiDisRes = list(mdb[collectname].find({'properties.vecvalid': True}, {
        'properties.vec': 1,
        'properties.uid': 1
    }))
    conn.close()

    # 获取density信息以及行政区划信息
    db, cur = connectMYSQL('tdnormal')
    cur.execute(
        "SELECT id, dis, wpnumber AS 'num' from %sEmatrix WHERE wpnumber > 0;"
        % citynames[city])

    denarr = {}
    disarr = []
    for each in cur.fetchall():
        # print each
        dis = str(each[1])
        denarr[str(each[0])] = {'dis': dis, 'num': long(each[2])}

        if dis not in disarr:
            disarr.append(dis)

    cur.close()
    db.close()

    print "sql ready"

    res = {'total': [0.0] * 11}
    # 初始化各区划及城市总值对象
    for each in disarr:
        res[each] = [0.0] * 11

    # 遍历poi网格更新对象
    #
    for each in poiDisRes:
        id = str(each['properties']['uid'])
        if id in denarr:
            # print 'here'
            for x in xrange(0, 11):
                increment = denarr[id]['num'] * each['properties']['vec'][x]
                dis = denarr[id]['dis']
                if dis in res:
                    res[dis][x] += increment
                else:
                    res[dis] = [0.0] * 11
                    res[dis][x] += increment
                res['total'][x] += increment

    # 存储对象进文件

    # output = {}
    # for k in res:
    # 	if k == 'total':
    # 		output[k] = res[k]
    # 	else:

    with open(
            os.path.join(
                '/home/joe/Documents/git/living-modes-visual-comparison/server/data/tmp',
                '%s_poidis.json' % citynames[city]), 'w+') as target:
        json.dump(res, target)
    target.close()
Example #10
0
    def matchUserRecords(self, split, jobID, userlist):
        """Enumerate users with all their records, matching with grid and store them into mongoDB
		
		Args:
			split (float): distance interval of lat and lng, the unit is degree
			jobID (int): Job Number
			userlist (array): userlist is consists of many tdid strings
			colname (string): Collection name in MongoDB
		
		Returns:
			NULL: Description
		"""

        # initialize database connections and data structure
        db, cur = connectMYSQL(self.db['mysqldb'])
        conn, mdb = connectMongo(self.db['dbname'])
        users = mdb[self.db['usercolname']]

        userrecords, usernum = [], 0

        # enum user in userlist
        for user in userlist:
            usernum += 1
            # execute the query and get records result
            cur.execute(
                "SELECT dayType, dateID, timeSegID, lat, lng FROM cbeijing WHERE tdid = %s",
                (user, ))
            res = cur.fetchall()
            tmprecords = []

            # each (tuple) format:
            # 0: dayType
            # 1: dateID
            # 2: timeSegID
            # 3: lat
            # 4: lng
            for each in res:
                # if the record is not in region grid, then we don't consider it as a valid record
                dayType, dateID, timeSegID, lat, lng = each[0], int(
                    each[1]), int(each[2]), float(each[3]), float(each[4])
                if lat < self.citylocs['south'] or lat >= self.citylocs[
                        'north'] or lng < self.citylocs[
                            'west'] or lng >= self.citylocs['east']:
                    continue

                lngnum = int((self.citylocs['east'] - self.citylocs['west']) /
                             split + 1)
                latind = int((lat - self.citylocs['south']) / split)
                lngind = int((lng - self.citylocs['west']) / split)
                uid = int(lngind + latind * lngnum)
                tmprecords.append({
                    '_id': '%s-%s-%s' % (user, dateID, timeSegID),
                    'id': int(user),
                    'geometry': {
                        'type': 'Point',
                        'coordinates': [lng, lat]
                    },
                    'type': 'Feature',
                    'properties': {
                        'gridUID': uid,
                        'daytype': dayType,
                        'dateid': dateID,
                        'timesegid': timeSegID
                    }
                })

            userrecords.extend(tmprecords)

            # insert into mongoDB
            if usernum == 300:
                logging.info("Job %s inserted 300 records into database." %
                             jobID)
                users.insert(userrecords)
                userrecords = []
                usernum = 0

        # there are still some users in the array
        if usernum != 0:
            users.insert(userrecords)

        cur.close()
        db.close()
        conn.close()
Example #11
0
    def aggregateVector(self, userlist, invalidlist, validobj):
        """aggregate user behavior vector, according to given userlist
		
		Args:
			userlist (array): user list
			invalidlist (array): false grid uid list
			validobj (object): true grid object collection
		
		Returns:
			NULL: Description
		"""
        conn, mdb = connectMongo(self.db['dbname'])
        features, users = mdb[self.db['featurecolname']], mdb[
            self.db['usercolname']]

        userveclist = []  # used to store user vectors

        # enum users, aggreate each user's records in an average strategy
        for user in userlist:
            # used to store current user's unaggreated sub-vector sets
            tpCol = initTimePeriods()
            tmpvecs = tpCol['tpVectors']
            typearr = tpCol['tpNames']

            # user: user ID
            user = int(user)
            reclists = list(
                users.find({"id": user}, {
                    "properties.gridUID": 1,
                    "properties.timesegid": 1,
                    "properties.daytype": 1
                }))

            for x in reclists:
                uid = str(x['properties']['gridUID'])

                # if the grid is invalid, then we just jump over this record
                if uid in invalidlist:
                    continue
                else:
                    try:
                        vec = validobj[uid]
                        daytype = str(x['properties']['daytype'])
                        timesegid = int(x['properties']['timesegid'] / 10)

                        # judge which type of feature it is
                        vecInd = judFeatureTP(daytype, timesegid)

                        for eachvecInd in vecInd:
                            tmpvecs[eachvecInd]['num'] += 1
                            tmpvecs[eachvecInd]['vec'] = self.vecAdd(
                                tmpvecs[eachvecInd]['vec'], vec)
                    except Exception as e:
                        raise e

            # used to be a template for generating one's feature vector
            vectmpl = {
                '_id': user,
                'pVec': [],
                'tpNumVec': [],
                'totalNum': len(reclists)
            }

            # aggreate vector
            notallnull = False
            for x in typearr:
                if tmpvecs[x]['num'] != 0:
                    notallnull = True
                    vectmpl['pVec'].append([
                        tmpvecs[x]['vec'][i] / float(tmpvecs[x]['num'])
                        for i in xrange(len(tmpvecs[x]['vec']))
                    ])
                    vectmpl['tpNumVec'].append(int(tmpvecs[x]['num']))
                else:
                    vectmpl['pVec'].append([0] * len(tmpvecs[x]['vec']))
                    vectmpl['tpNumVec'].append(0)

            if notallnull:
                userveclist.append(vectmpl)

            if len(userveclist) == 300:
                features.insert(userveclist)
                userveclist = []
                gc.collect()

        if len(userveclist) != 0:
            features.insert(userveclist)

        conn.close()
Example #12
0
def calGridAoiDis(city):
    # 初始化网格,构建 POI 类别档案
    locs = getCityLocs(city)
    grids = []

    split = 0.05
    centerincrement = 0.025  # round(split / 2.0, 4)
    latnum = int((locs['north'] - locs['south']) / split + 1)
    lngnum = int((locs['east'] - locs['west']) / split + 1)

    conn, db = connectMongo('tdnormal')
    POIs = db['pois_%s' % city]

    # 构建栅栏数组
    disobjs = []
    with open(
            os.path.join(
                '/home/taojiang/git/living-modes-visual-comparison/conf/data',
                getAbbName(city) + '.json')) as f:
        stream = json.load(f)
        features = stream['features']

        for each in features:
            disobjs.append({
                'name': each['properties']['name'],
                'geo': shape(each['geometry'])
            })
    f.close()

    # 遍历查询网格周围 POI 并更新
    for latind in xrange(0, latnum):
        for lngind in xrange(0, lngnum):
            # 前11元素均为分类别统计数量,最后一个元素为POI总量
            tmpGrid = [0 for x in xrange(0, 15)]
            vaildGrid = False

            lat = round(locs['south'] + latind * split, 3)
            lng = round(locs['west'] + lngind * split, 3)
            lnginc = round(lng + split, 3)
            latinc = round(lat + split, 3)
            lngcen = round(lng + centerincrement, 4)
            latcen = round(lat + centerincrement, 4)

            # 一个正方形 geojson 对象,代表当前方块对应的地理边界
            coordsarr = [[lng, lat], [lnginc, lat], [lnginc, latinc],
                         [lng, latinc], [lng, lat]]

            featurelistarray = [0] * 11
            typevalid = False

            # query all the POIs less than maxQRadius
            nearPOIList = list(
                POIs.find(
                    {
                        "properties.center": {
                            '$near': {
                                '$geometry': {
                                    'type': "Point",
                                    'coordinates': [lngcen, latcen]
                                },
                                '$minDistance': 0,
                                '$maxDistance': 2500 * (2**0.5)
                            }
                        }
                    }, {'properties': 1}))

            # construct vector with POIs types info
            poilen = len(nearPOIList)
            if poilen != 0:
                vaildGrid = True

                # POI list is not null
                for each in nearPOIList:
                    curPInd = each['properties']['ftype'] - 1
                    tmpGrid[curPInd] += 1
                    tmpGrid[11] += 1

            if vaildGrid:

                tmpGrid[12] = lngcen
                tmpGrid[13] = latcen

                point = Point(lngcen, latcen)
                index = getDisIndex(point, disobjs)
                tmpGrid[14] = index
                if index == -1:
                    print 'Invalid Grid Found.'

                grids.append(tmpGrid)

    print "%s City with valid grids %s" % (city, str(len(grids)))
    return grids
def main(argv):
	# 输入参数对照列表
	try:
		opts, args = getopt.getopt(argv, "hc:d:n:", ["help", "city=", 'directory=', 'number='])
	except getopt.GetoptError as err:
		print str(err)
		usage()
		sys.exit(2)

	# 处理输入参数
	city, directory, number = 'zhangjiakou', '/home/tao.jiang/datasets/JingJinJi', 999
	for opt, arg in opts:
		if opt == '-h':
			usage()
			sys.exit()
		elif opt in ("-c", "--city"):
			city = arg
		elif opt in ("-d", "--directory"):
			directory = arg
		elif opt in ('-n', '--number'):
			number = int(arg)

	STARTTIME = time.time()
	print "Start approach at %s" % STARTTIME

	# 连接数据获取网格信息,包括总数,具有有效POI的网格
	conn, db = connectMongo('tdnormal')
	GRIDSNUM = db['newgrids_%s' % city].count()
	gridsData, validIDs = getGridsFromMongo(city, db)
	conn.close()

	# 获取城市起始区划下标,城市行政区划数目
	CITYDISIND, CITYDISNUM = getCityDisInfo(city)

	# @多进程运行程序 START
	manager = Manager()
	jobs = []

	for x in xrange(0,20):
		PROP = {
			'INDEX': x,
			'DIRECTORY': directory,
			'GRIDSNUM': GRIDSNUM,
			'CITY': city,
			'CITYDISIND': CITYDISIND,
			'CITYDISNUM': CITYDISNUM,
			'FILENUM': number
		}

		DATA = {
			'gridsData': gridsData,
			'validIDs': validIDs
		}

		jobs.append( Process(target=processTask, args=(PROP, DATA)) )
		jobs[x].start()

	# 等待所有进程结束
	for job in jobs:
	    job.join()

	# 合并结果文件
	MERGE = time.time()
	print "Start merge at %s" % MERGE
	mergeMatrixFiles(city, GRIDSNUM, directory)
	mergeDistributionFiles(city, directory)
	print "End merge in %s" % str(time.time() - MERGE)

	# 结束
	ENDTIME = time.time()
	print "End approach at %s" % ENDTIME