def get_and_write_yeartraffic_into_database( self, mysql_pool: ConnectPool, city_pid: int, time_interval: datetime.timedelta): yeartraffic_instances = self.yeartraffic(city_pid, mysql_pool) if len(yeartraffic_instances ) == 0 or yeartraffic_instances is None: # 此次请求没有数据 return for yeartraffic in yeartraffic_instances: region_id = yeartraffic.region_id date = yeartraffic.date index = yeartraffic.index sql_cmd = "insert into digitalsmart.yeartraffic(pid, tmp_date, rate) VALUE (%d,%d,%f)" % ( region_id, date, index) mysql_pool.sumbit(sql_cmd) sql = "select tmp_date,rate from digitalsmart.yeartraffic where pid={pid} and tmp_date>=20190101;".format( pid=city_pid) yeartraffic_data = mysql_pool.select(sql) cache_yeartraffic_mapping = dict() for tmp_date, rate in yeartraffic_data: cache_yeartraffic_mapping[tmp_date] = rate if not cache_yeartraffic_mapping: return redis_key = "yeartraffic:{pid}".format(pid=city_pid) self._redis_worker.set(redis_key, json.dumps(cache_yeartraffic_mapping)) self._redis_worker.expire(name=redis_key, time_interval=time_interval)
def manager_app_active_data(self): # 每个月一次 """ 获取app的用户画像数据,性别占比,年龄分布,省份覆盖率,app用户关键词""" mysql_pool = ConnectPool(max_workers=10, host=host, user=user, password=password, port=port, database=database) app = AppUserhabit() sql = "select id,name from digitalsmart.appinfo" result = mysql_pool.select(sql) if not result: return None today = datetime.datetime.today() # 每次启动都挖掘前2个月的数据 start_date = str( datetime.datetime(today.year, today.month - 2, 1).date()) # "yyyy-mm-01 for pid, appname in result: obj = app.get_app_active_data(appname, pid, start_date) if obj is None: continue name = obj.app_name # app名 date = obj.date # 时间 active_num = obj.active_num # 活跃用户数 active_rate = obj.active_rate # 活跃用户率 rate_hight = obj.rate_hight # 行业基准 rate_low = obj.rate_low # 行业均值 sql_cmd = "insert into digitalsmart.appactive(pid, ddate, activenum, activerate, base_activerate, aver_activerate) " \ "VALUE (%d,'%s',%d,%f,%f,%f)" % (pid, date, active_num, active_rate, rate_hight, rate_low) mysql_pool.sumbit(sql_cmd)
def get_and_write_roadtraffic_into_database( self, mysql_pool: ConnectPool, city_pid: int, up_date: int, time_interval: datetime.timedelta): road_instances = self.road_manager(city_pid) # 获取道路数据 if road_instances is None: return for road in road_instances: region_id = road.region_id # 标识 roadname = road.roadname # 路名 speed = road.speed # 速度 direction = road.direction # 方向 bounds = road.bounds # 经纬度数据集 road_traffic_rate_list = road.road_index_data_list # 拥堵指数集合 road_traffic_time_list = road.time_data_list num = road.num data_pack = json.dumps({ "num": num, "time": road_traffic_time_list, "data": road_traffic_rate_list }) rate = road.rate # 当前拥堵指数 roadid = road.num # 用排名表示道路id sql_insert = "insert into digitalsmart.roadtraffic(pid, roadname, up_date, speed, direction, " \ "bound, data,roadid,rate) VALUE (%d,'%s',%d,%f,'%s','%s','%s',%d,%f) " % ( region_id, roadname, up_date, speed, direction, bounds, data_pack, roadid, rate) mysql_pool.sumbit(sql_insert) sql_cmd = "update digitalsmart.roadmanager set up_date={0} where pid={1} and roadid={2}" \ .format(up_date, region_id, roadid) mysql_pool.sumbit(sql_cmd) # 更新最近更新时间 # 缓存数据 # if data is None or bounds is None: # 道路拥堵指数数据包请求失败情况下 # continue redis_key = "road:{pid}:{road_id}".format(pid=city_pid, road_id=roadid) cache_road_data_mapping = { "pid": city_pid, "roadname": roadname, "up_date": up_date, "speed": speed, "direction": direction, "bounds": bounds, "data": data_pack, "roadpid": roadid, "rate": rate } self._redis_worker.set(redis_key, str(cache_road_data_mapping)) self._redis_worker.expire(name=redis_key, time_interval=time_interval)
def filter_peopleflow(self, mysql_pool: ConnectPool, positioning_instances: Iterator[Positioning], ddate: int, peoplepid: int, table_id: int) -> \ List[Positioning]: """ 检查数据库是否存在部分数据,存在则不再插入 :param mysql_pool: mysql连接池 :param objs: Positioning列表 :param ddate: 日期 :param peoplepid: 数据库查询条件 :return: list(Positioning) """ sql_cmd = "select ttime from digitalsmart.historyscenceflow{table_id} where pid={pid} and ddate={ddate} ".format( table_id=table_id, pid=peoplepid, ddate=ddate) mysql_positioning_data = mysql_pool.select(sql_cmd) # 从数据库获取已经存在的数据 dic = {} for positioning in positioning_instances: # 构造对象字典 dic[positioning.detailTime] = positioning for positioning_data in mysql_positioning_data: # 将存在的数据淘汰掉 positioning_data = str(positioning_data[0]) # 因为从数据库取出来数据时不知为什么变为 、 0:00:00这种格式不适合 detailtime: str = positioning_data if len( positioning_data) == 8 else "0" + positioning_data try: dic.pop(detailtime) except KeyError: continue return list(dic.values())
def dealwith_daily_traffic(self, daily_traffic_instances: List[DayilTraffic], pid: int, mysql_pool: ConnectPool, today: int, yesterday: int) -> List[DayilTraffic]: """ 重复数据处理 :param daily_traffic_instances: 交通延迟数据对象包 :param pid: 城市id :param mysql_pool: mysql连接词 :param today: 今天日期,格式yyyymmdd :param yesterday: 昨天日期,格式yyyymmdd :return: """ # 将昨天的数据全部剔除 daily_traffic_instances = list(daily_traffic_instances) for i in range(len(daily_traffic_instances)): if daily_traffic_instances[i].date > yesterday: daily_traffic_instances = daily_traffic_instances[i:] break "下面是过滤今天已经存在的数据---今天重复的数据剔除" sql = "select ttime from digitalsmart.citytraffic where pid={0} and ddate={1} order by ttime".format( pid, today) data = mysql_pool.select(sql) if len(data): ttime = str(data[len(data) - 1][0]) if len( str(data[len(data) - 1][0])) == 8 else "0" + str( data[len(data) - 1][0]) # 最新的时间 xx:xx:xx else: ttime = "-1:00:00" # 今天还未录入数据的情况 # 剔除今天重复的数据 info = self.filter(daily_traffic_instances, ttime) return info
def manager_city_year_traffic(self): time_interval = datetime.timedelta(days=1) mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port, database=database) sql = "select pid from digitalsmart.citymanager" thread_pool = ThreadPool(max_workers=6) data = mysql_pool.select(sql) for item in data: yearpid = item[0] thread_pool.submit(self.get_and_write_yeartraffic_into_database, mysql_pool, yearpid, time_interval) thread_pool.run() thread_pool.close() mysql_pool.close() print("城市季度交通数据挖掘完毕")
def get_and_write_dailytraffic_into_database( self, mysql_pool: ConnectPool, city_pid: int, cityname: str, time_interval: datetime.timedelta): """ :param mysql_pool: 连接池实例 :param city_pid: 城市id :param cityname: 城市名 :param time_interval: 缓存时间 :return: """ dailytraffic_instances = self.get_city_traffic( citycode=city_pid) # 获取交通数据 if dailytraffic_instances is None: print("pid:%d -- city:%s 没有数据" % (city_pid, cityname)) return now = time.time() # 现在的时间 # 分好昨今以便分类过滤 today = int(time.strftime('%Y%m%d', time.localtime(now))) yesterday = int( time.strftime('%Y%m%d', time.localtime(now - 3600 * 24))) dailytraffic_instances = list(dailytraffic_instances) # 缓存数据 cache_traffic_data = dict() for dailytraffic in dailytraffic_instances: detailtime = dailytraffic.detailtime # 时间点 index = dailytraffic.index # 拥堵指数 cache_traffic_data[detailtime] = index # 缓存数据 redis_key = "traffic:{0}".format(city_pid) self._redis_worker.hashset(name=redis_key, mapping=cache_traffic_data) self._redis_worker.expire(name=redis_key, time_interval=time_interval) # 过滤掉昨天和已经存在的数据 filter_dailytraffic_instances = self.dealwith_daily_traffic( dailytraffic_instances, city_pid, mysql_pool, today, yesterday) # 数据写入mysql for dailytraffic in filter_dailytraffic_instances: sql_cmd = "insert into digitalsmart.citytraffic(pid, ddate, ttime, rate)" \ " values('%d', '%d', '%s', '%f');" % ( city_pid, dailytraffic.date, dailytraffic.detailtime, dailytraffic.index) mysql_pool.sumbit(sql_cmd)
def manager_city_traffic(self): """ 获取城市实时交通拥堵情况并写入数据库,半小时执行一次 :return: """ mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port, database=database) sql = "select pid, name from digitalsmart.citymanager" data = mysql_pool.select(sql) # 千万不要开系统自带的线程池,占用的内存过大,而且每次线程退出后内存都没有释放,而是一直累加。使用自定义线程池, thread_pool = ThreadPool(max_workers=6) time_interval = datetime.timedelta(minutes=40) # 缓存时间 for item in data: city_pid = item[0] city_name = item[1] thread_pool.submit(self.get_and_write_dailytraffic_into_database, mysql_pool, city_pid, city_name, time_interval) thread_pool.run() thread_pool.close() mysql_pool.close() print("城市交通数据挖掘完毕")
def manager_city_road_traffic(self): """ 获取每个城市实时前10名拥堵道路数据-----10分钟执行一遍 :return: """ mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port, database=database) up_date = int(datetime.datetime.now().timestamp()) # 记录最新的更新时间 sql = "select pid from digitalsmart.citymanager" data = mysql_pool.select(sql) # pid集合 time_interval = datetime.timedelta(minutes=60) # 缓存时间 for item in data: # 这里最好不要并发进行,因为每个pid任务下都有10个子线程,在这里开并发 的话容易被封杀 city_pid = item[0] self.get_and_write_roadtraffic_into_database(mysql_pool, city_pid, up_date, time_interval) mysql_pool.close() print("城市道路交通数据挖掘完毕")
def __dealwith_year_traffic(self, info: Iterator, pid: int, mysql_pool: ConnectPool, lastdate: int) -> List[YearTraffic]: sql = "select tmp_date from digitalsmart.yeartraffic where pid={0} and tmp_date>= {1} order by tmp_date".format( pid, lastdate) data = mysql_pool.select(sql) if len(data) == 0: return list(info) last_date: int = data[-1] # 最近的日期 info = list(info) if len(info) == 0: # 请求失败情况下 return [] i = -1 for i in range(len(info)): if info[i].date == last_date: break return info[i + 1:]
# data = cursor.fetchall() # # if len(data) == 0: # cursor.close() # return info # lis = [] # # for item in info: # if item['date'] != date: # continue # lis.append(dict(zip(item.values(), item.keys()))) # info = lis # for olddata in data: # self.filter(info, olddata[0], olddata[1]) # lis = [] # for item in info: # lis.append(dict(zip(item.values(), item.keys()))) # info = lis # return info if __name__ == "__main__": p = ScenceFlow() pool = ConnectPool(max_workers=1) db = pool.work_queue.get() p.get_scence_situation(db, 1365, 555) # db = pymysql.connect(host=host, user=user, password=password, database=scencedatabase, # port=port) # data = p.get_scence_situation(db, 1174) # # # p.write_scence_situation(db, data)
def __init__(self): self.redis_worke = RedisConnectPool(max_workers=10) self.mysql_worke = ConnectPool(max_workers=1, host=host, user=user, password=password, port=port, database=database)
def manager_city_airstate(self): """ 更新空气质量数据 :return: """ semaphore = Semaphore(1) queue = Queue(1) # 用来通知更新时间 mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port, database=database) now = datetime.datetime.now() sql = "select pid,name from digitalsmart.citymanager" result = list(mysql_pool.select(sql)) sql = "select lasttime from digitalsmart.airstate where flag=1" try: lasttime = mysql_pool.select(sql)[0][0] # 最近更新时间 except TypeError: lasttime = None weather = Weather() iter_citys_waether_objs = weather.get_city_weather_pid() # 获取城市天气id city_map = dict() for obj in iter_citys_waether_objs: city_map[obj.city] = obj.aqi_pid thread_pool = ThreadPool(max_workers=6) count = len(result) # 任务计数,0时通知更新时间 time_interval = datetime.timedelta(minutes=60) for item in result: citypid = item[0] city = item[1] city_weather_pid = city_map[city] def fast(pid, weather_pid): aqi_state = weather.last_air_state(weather_pid) # 请求数据 aqi = aqi_state.aqi pm2 = aqi_state.pm2 pm10 = aqi_state.pm10 so2 = aqi_state.so2 no2 = aqi_state.no2 co = aqi_state.co o3 = aqi_state.o3 # 缓存数据 redis_key = "air:{0}".format(pid) value = {"pid": pid, "aqi": aqi, "pm2": pm2, "pm10": pm10, "co": co, "no2": no2, "o3": o3, "so2": so2, "flag": 1, "lasttime": str(now)} self._redis_work.hashset(redis_key, value) self._redis_work.expire(name=redis_key, time_interval=time_interval) # 更新数据库 sql_cmd = "insert into digitalsmart.airstate(pid, aqi, pm2, pm10, co, no2, o3, so2, flag, lasttime) " \ "value (%d,%d,%d,%d,%f,%d,%d,%d,%d,'%s')" % (pid, aqi, pm2, pm10, co, no2, o3, so2, 1, now) mysql_pool.sumbit(sql_cmd) semaphore.acquire() nonlocal count count -= 1 # 计数 semaphore.release() if count == 0: # 所有任务完成,通知数据库可以更新天气时间了 queue.put(1) return thread_pool.submit(fast, citypid, city_weather_pid) thread_pool.run() thread_pool.close() # 更新时间 if lasttime: sql = "update digitalsmart.airstate set flag=0 where lasttime='{0}'".format(lasttime) queue.get() mysql_pool.sumbit(sql) mysql_pool.close() print("天气数据写入成功")
def manager_scence_situation(self): """ 景区客流数据管理----半小时一轮 """ # 景区数据源类别--百度为1,腾讯为0 type_flag = 1 # 接连接池 mysql_pool = ConnectPool(max_workers=6, host=host, user=user, password=password, port=port, database=database) # 获取需要获取实时客流量数据的景区信息 sql = "select ds.pid,dt.table_id from digitalsmart.scencemanager as ds inner join digitalsmart.tablemanager " \ "as dt on ds.type_flag=dt.flag and ds.pid=dt.pid where ds.type_flag=1" # 提交mysql查询 iterator_pids = mysql_pool.select(sql) # 线程池 thread_pool = ThreadPool(max_workers=6) ddate: int = int( str(datetime.datetime.today().date()).replace("-", '')) # 缓存时间 time_interval = datetime.timedelta(minutes=33) scence = ScenceFlow() # 开始请求 for pid, table_id in iterator_pids: def fast(area_id, table_index): """ 请求景区客流量数据以及写入数据库和内存 :param area_id: 景区id :param table_index: 景区所处表序号 :return: """ # 获取最新的客流量数据 positioning_instances = scence.get_scence_situation(area_id) # 过滤重复的数据后的定位人流数据 filter_positioning_instances = iter( scence.filter_peopleflow(mysql_pool, positioning_instances, ddate, area_id, table_index)) # 缓存人流定位数据 cache_data_mapping = dict() for positioning in positioning_instances: cache_data_mapping[ positioning.detailTime] = positioning.num # 缓存key redis_key = "scence:{0}:{1}".format(area_id, type_flag) # 缓存 self._redis_worker.hashset(name=redis_key, mapping=cache_data_mapping) self._redis_worker.expire(name=redis_key, time_interval=time_interval) # 确定插入哪张表 sql_format = "insert into digitalsmart.historyscenceflow{table_index} (pid, ddate, ttime, num) " \ "values ('%d','%d','%s','%d')".format(table_index=table_index) # 开始插入mysql数据库 for positioning in filter_positioning_instances: sql_cmd = sql_format % ( positioning.region_id, positioning.date, positioning.detailTime, positioning.num) # 提交 mysql_pool.sumbit(sql_cmd) # 提交任务 thread_pool.submit(fast, pid, table_id) # 开始执行任务队列 thread_pool.run() # 关闭线程池 thread_pool.close() # 关闭mysql连接池 mysql_pool.close() print("百度资源景区数据挖掘完毕")
def manager_scenece_people(self): """ 更新某时刻的人流数据 :return: """ self.mysql_pool = ConnectPool(max_workers=8, host=host, user=user, password=password, port=port, database=database) # mysql连接池 up_date = int(datetime.datetime.now().timestamp()) # 获取当前时间戳 sql: str = "select pid,latitude,longitude from digitalsmart.scencemanager where type_flag=0" # 查询需要查询的景区信息 data = self.mysql_pool.select(sql) date_today = datetime.datetime(2019, 10, 8, 1, 1, 0) # 今天日期 ddate = str(date_today.date()) tmp_date = date_today.timestamp() # 今天时间戳 if date_today.time().minute % 5 > 0: # 纠正计算挤时间,分钟必须事5的倍数 now_time = date_today.time() # 当前时间点 minute = now_time.minute - now_time.minute % 5 # 调整当前时间点的分钟 detailtime = "{0:0>2}:{1:0>2}:00".format(now_time.hour, minute) else: detailtime = time.strftime("%H:%M:00", time.localtime(tmp_date)) thread_pool = ThreadPool(max_workers=6) # 开启线程池 pos = PositioningSituation() for info in data: # 更新数据 def fast(item): """ 请求景区客流量趋势,写入数据库以及内存 :param item: 景区基本信息字典 :return: """ region_id = item[0] # 景区标识 float_lat = item[1] # 景区纬度 float_lon = item[2] # 景区经度 sql_cmd = "select table_id from digitalsmart.tablemanager where pid={0}".format( region_id) # 判断数据对应在哪张表插入 table_id = self.mysql_pool.select(sql_cmd)[0][0] # 数据对应插在那张表 last_positioning_data = pos.get_scenece_people_json_data( date=ddate, dateTime=detailtime, region_id=region_id) # 请求最新的人流分布数据 if not last_positioning_data: # 请求失败 return self.manager_scenece_people_distribution( last_positioning_data, region_id, up_date, float_lat, float_lon, table_id) # 更新人流分布情况数据 self.manager_scenece_people_situation(table_id, last_positioning_data, region_id, ddate, detailtime) thread_pool.submit(fast, info) # 提交任务 thread_pool.run() # 执行任务队列 thread_pool.close() # 关闭线程池 self.mysql_pool.close() print("景区人流数据挖掘完毕")
class ManagerScence(PositioningPeople): """ 缓存数据格式 景区人数: key= "scence:{0}:{1}".format(pid,type_flag),value={"HH:MM:SS":num,.....} 人流趋势:key = "trend:{pid}".format(pid=region_id) ,value={'00:00:00':rate} 人流分布: key = "distribution:{0}".format(pide) value=str([{"lat": centerlat + item.latitude, "lng": centerlon + item.longitude, "count": item.number},......) """ def __init__(self): self._redis_worker = RedisConnectPool(max_workers=7) def __del__(self): del self._redis_worker def manager_scence_situation(self): """ 景区客流数据管理----半小时一轮 """ # 景区数据源类别--百度为1,腾讯为0 type_flag = 1 # 接连接池 mysql_pool = ConnectPool(max_workers=6, host=host, user=user, password=password, port=port, database=database) # 获取需要获取实时客流量数据的景区信息 sql = "select ds.pid,dt.table_id from digitalsmart.scencemanager as ds inner join digitalsmart.tablemanager " \ "as dt on ds.type_flag=dt.flag and ds.pid=dt.pid where ds.type_flag=1" # 提交mysql查询 iterator_pids = mysql_pool.select(sql) # 线程池 thread_pool = ThreadPool(max_workers=6) ddate: int = int( str(datetime.datetime.today().date()).replace("-", '')) # 缓存时间 time_interval = datetime.timedelta(minutes=33) scence = ScenceFlow() # 开始请求 for pid, table_id in iterator_pids: def fast(area_id, table_index): """ 请求景区客流量数据以及写入数据库和内存 :param area_id: 景区id :param table_index: 景区所处表序号 :return: """ # 获取最新的客流量数据 positioning_instances = scence.get_scence_situation(area_id) # 过滤重复的数据后的定位人流数据 filter_positioning_instances = iter( scence.filter_peopleflow(mysql_pool, positioning_instances, ddate, area_id, table_index)) # 缓存人流定位数据 cache_data_mapping = dict() for positioning in positioning_instances: cache_data_mapping[ positioning.detailTime] = positioning.num # 缓存key redis_key = "scence:{0}:{1}".format(area_id, type_flag) # 缓存 self._redis_worker.hashset(name=redis_key, mapping=cache_data_mapping) self._redis_worker.expire(name=redis_key, time_interval=time_interval) # 确定插入哪张表 sql_format = "insert into digitalsmart.historyscenceflow{table_index} (pid, ddate, ttime, num) " \ "values ('%d','%d','%s','%d')".format(table_index=table_index) # 开始插入mysql数据库 for positioning in filter_positioning_instances: sql_cmd = sql_format % ( positioning.region_id, positioning.date, positioning.detailTime, positioning.num) # 提交 mysql_pool.sumbit(sql_cmd) # 提交任务 thread_pool.submit(fast, pid, table_id) # 开始执行任务队列 thread_pool.run() # 关闭线程池 thread_pool.close() # 关闭mysql连接池 mysql_pool.close() print("百度资源景区数据挖掘完毕") def manager_scence_trend(self): """ 地区人口趋势数据管理---5分钟一次 :return: """ date_today = datetime.datetime.today() # 今天 time_inv = datetime.timedelta(days=1) # 时间间隔 date_tomorrow = date_today + time_inv # 明天 str_start = str(date_today.date()) # 今天日期 str_end = str(date_tomorrow.date()) # 结束日期 now_time = datetime.datetime.now() # 现在时间 hour = now_time.hour minuties = now_time.minute minuties = minuties - minuties % 5 # 将分钟转为5的整数倍 dividing_time_line = str( datetime.datetime(2019, 1, 1, hour, minuties, 0).time()) # 作为分割实际和预测的时间线 # mysql 连接池 mysql_pool = ConnectPool(max_workers=6, host=host, user=user, password=password, port=port, database=database) sql = "select pid,area from digitalsmart.scencemanager where type_flag=0 " # 查询需要获取客流量趋势的景区信息 scence_info_data: Tuple = mysql_pool.select(sql) # 提交查询请求 thread_pool = ThreadPool(max_workers=6) # 连接线程池 time_interval = datetime.timedelta(minutes=33) # 缓存时间 pos_trend = PositioningTrend() predict = True # 是否获取预测数据 for item in scence_info_data: # 更新数据 pid = item[0] # 景区唯一标识 area = item[1] # 景区名字 def fast(region_id, place): """ 请求景区客流量趋势,写入数据库以及内存 :param region_id:景区标识 :param place: 景区名 :return: """ sql_cmd = "select ttime from digitalsmart.scencetrend where pid={0} and ddate='{1}' " \ "order by ttime".format(region_id, int(str_start.replace("-", ""))) # 景区数据最近的更新时间 last_ttime: str = "-1:00:00" # 默认-1点,当为-1时表示目前没有当天的任何数据 try: # 提交查询请求获取最近记录的时间 table_last_time = mysql_pool.select(sql_cmd)[-1][0] last_ttime = str(table_last_time) except IndexError: pass trend_instances = pos_trend.get_place_index( name=place, placeid=region_id, date_start=str_start, date_end=str_end, predict=predict) # 获取趋势数据 cache_data_mapping = dict() # 用来缓存数据 for trend in trend_instances: # 插入数据以及缓存--将实际的数据插入数据库,实际和预测的放在内存 ttime = trend.detailtime # 该时间点 rate = trend.index # 该时间点指数 cache_data_mapping[ttime] = rate if ttime <= last_ttime or ttime > dividing_time_line: # 过滤最近记录时间前的数据 continue region_id = trend.region_id # 景区标识 ddate = trend.ddate # 目前日期 sql_cmd = "insert into digitalsmart.scencetrend(pid, ddate, ttime, rate) VALUE(%d,%d,'%s',%f)" % ( region_id, ddate, ttime, rate) mysql_pool.sumbit(sql_cmd) # 写入数据库 # 缓存key if not cache_data_mapping: return redis_key = "trend:{pid}".format(pid=region_id) # 缓存数据 self._redis_worker.hashset(name=redis_key, mapping=cache_data_mapping) self._redis_worker.expire(name=redis_key, time_interval=time_interval) thread_pool.submit(fast, pid, area) # 提交任务 thread_pool.run() # 执行任务队列 thread_pool.close() # 关闭线程池 mysql_pool.close() # 关闭mysql连接池 print("景区趋势挖掘完毕") def manager_scenece_people(self): """ 更新某时刻的人流数据 :return: """ self.mysql_pool = ConnectPool(max_workers=8, host=host, user=user, password=password, port=port, database=database) # mysql连接池 up_date = int(datetime.datetime.now().timestamp()) # 获取当前时间戳 sql: str = "select pid,latitude,longitude from digitalsmart.scencemanager where type_flag=0" # 查询需要查询的景区信息 data = self.mysql_pool.select(sql) date_today = datetime.datetime(2019, 10, 8, 1, 1, 0) # 今天日期 ddate = str(date_today.date()) tmp_date = date_today.timestamp() # 今天时间戳 if date_today.time().minute % 5 > 0: # 纠正计算挤时间,分钟必须事5的倍数 now_time = date_today.time() # 当前时间点 minute = now_time.minute - now_time.minute % 5 # 调整当前时间点的分钟 detailtime = "{0:0>2}:{1:0>2}:00".format(now_time.hour, minute) else: detailtime = time.strftime("%H:%M:00", time.localtime(tmp_date)) thread_pool = ThreadPool(max_workers=6) # 开启线程池 pos = PositioningSituation() for info in data: # 更新数据 def fast(item): """ 请求景区客流量趋势,写入数据库以及内存 :param item: 景区基本信息字典 :return: """ region_id = item[0] # 景区标识 float_lat = item[1] # 景区纬度 float_lon = item[2] # 景区经度 sql_cmd = "select table_id from digitalsmart.tablemanager where pid={0}".format( region_id) # 判断数据对应在哪张表插入 table_id = self.mysql_pool.select(sql_cmd)[0][0] # 数据对应插在那张表 last_positioning_data = pos.get_scenece_people_json_data( date=ddate, dateTime=detailtime, region_id=region_id) # 请求最新的人流分布数据 if not last_positioning_data: # 请求失败 return self.manager_scenece_people_distribution( last_positioning_data, region_id, up_date, float_lat, float_lon, table_id) # 更新人流分布情况数据 self.manager_scenece_people_situation(table_id, last_positioning_data, region_id, ddate, detailtime) thread_pool.submit(fast, info) # 提交任务 thread_pool.run() # 执行任务队列 thread_pool.close() # 关闭线程池 self.mysql_pool.close() print("景区人流数据挖掘完毕") def manager_scenece_people_distribution(self, scence_people_data: dict, region_id, tmp_date: int, centerlat: float, centerlon: float, table_id: int): """ 更新地区人口分布数据---这部分每次有几k条数据插入 :param scence_people_data: 人流数据包 :param region_id: 景区标识 :param tmp_date: 时间戳 :param centerlat: 中心纬度 :param centerlon: 中心经度 :param table_id: 表序号 :return: """ # scence_people_data中{',': 0}这类数据属于异常数据 if len(scence_people_data.keys()) <= 1: return pos = PositioningSituation() geographi_instances = pos.get_scence_distribution_situation( scence_people_data) # 获取经纬度人数结构体迭代器 select_table: str = "insert into digitalsmart.peopleposition{table_id} (pid, tmp_date, lat, lon, num) VALUES" \ .format(table_id=table_id) # 确定哪张表 geographi_instances = list(geographi_instances) # 存放经纬度人数数据 insert_mysql_geographi_datapack = self._generator_of_mysql_insert_data( geographi_instances, region_id, tmp_date, centerlat, centerlon) # 需要插入数据库的数据生成器 redis_data = self._generator_of_redis_insert_data( geographi_instances, centerlat, centerlon) # 需要缓存的数据包生成器 time_interval = datetime.timedelta(minutes=60) # 缓存时间 redis_key = "distribution:{0}".format(region_id) # 缓存key # 缓存数据 try: value = json.dumps(list(redis_data)) except Exception as e: print("列表序列化失败", e, region_id) return self._redis_worker.set(name=redis_key, value=value) # 缓存 self._redis_worker.expire(name=redis_key, time_interval=time_interval) # 一条条提交到话会话很多时间在日志生成上,占用太多IO了,拼接起来再写入,只用一次日志时间而已 # 但是需要注意的是,一次性不能拼接太多,管道大小有限制---需要在MySQL中增大Max_allowed_packet,否则会报错 count = 0 # 用来计数 geographi_data = list() # 存放需要写入数据库的数据 for geographi in insert_mysql_geographi_datapack: count += 1 geographi_data.append(geographi) if count % 3000 == 0: sql_value = ','.join(geographi_data) sql = select_table + sql_value # 提交数据 self.mysql_pool.sumbit(sql) geographi_data.clear() if len(geographi_data) > 0: sql_value = ','.join(geographi_data) sql = select_table + sql_value self.mysql_pool.sumbit(sql) sql = "update digitalsmart.tablemanager " \ "set last_date={0} where pid={1};".format(tmp_date, region_id) # 更新人流分布管理表的修改时间 self.mysql_pool.sumbit(sql) def manager_scenece_people_situation(self, table_id: int, scence_people_data: dict, pid: int, ddate: str, ttime: str): """ 更新地区人口情况数据 ---这部分每次只有一条数据插入 :param table_id: 表序号 :param scence_people_data:人流数据包 :param pid: 景区标识 :param ddate: 日期:格式yyyy-mm-dd :param ttime: 时间,格式HH:MM:SS :return: """ type_flag = 0 # 景区数据源类别--百度为1,腾讯为0 pos = PositioningSituation() positioning_instance = pos.get_scence_people_count( scence_people_data, ddate, ttime, pid) sql_format = "insert into digitalsmart.historyscenceflow{table_id}(pid, ddate, ttime, num) values (%d,%d,'%s',%d) ".format( table_id=table_id) sql = sql_format % ( positioning_instance.region_id, positioning_instance.date, positioning_instance.detailTime, positioning_instance.num) self.mysql_pool.sumbit(sql) # 插入mysql数据库 format_int_of_ddate = int(ddate.replace("-", '')) sql = "select ttime,num from digitalsmart.historyscenceflow{table_id} where ddate={ddate};".format( table_id=table_id, ddate=format_int_of_ddate) people_flow_data = self.mysql_pool.select(sql) # 需要缓存的数据 cache_data_mapping = dict() for people_flow in people_flow_data: ttime = str(people_flow[0]) # 时间 match_ttime_format = re.match("(\d+):(\d+):(\d+)", ttime) hour = match_ttime_format.group(1) minutiue = match_ttime_format.group(2) second = match_ttime_format.group(3) if len(hour) == 1: hour = "0" + hour ttime = "{0}:{1}:{2}".format(hour, minutiue, second) # 格式化 num = people_flow[1] # 客流数 cache_data_mapping[ttime] = num # 缓存时间 if not cache_data_mapping: return time_interval = datetime.timedelta(minutes=60) # 缓存 redis_key = "scence:{0}:{1}".format(pid, type_flag) self._redis_worker.hashset(name=redis_key, mapping=cache_data_mapping) self._redis_worker.expire(name=redis_key, time_interval=time_interval) @staticmethod def _generator_of_mysql_insert_data(source_data: list, region_id: int, tmp_date: int, centerlat: float, centerlon: float) -> Iterator[str]: """ 生产需要插入数据库的景区人流分布数据生成器 :param source_data: 对象数据包 :param region_id: 景区标识 :param tmp_date: 时间戳 :param centerlat: 中心纬度 :param centerlon: 中心经度 :return: """ for instance in source_data: yield str((region_id, tmp_date, centerlat + instance.latitude, centerlon + instance.longitude, instance.number)) @staticmethod def _generator_of_redis_insert_data(source_data: list, centerlat: float, centerlon: float): """ 生成需要缓存再redis里的生成器 :param source_data: 对象数据包 :param centerlat: 中心纬度 :param centerlon: 中心经度 :return: """ for instance in source_data: yield { "lat": centerlat + instance.latitude, "lng": centerlon + instance.longitude, "count": instance.number }
def manager_scence_trend(self): """ 地区人口趋势数据管理---5分钟一次 :return: """ date_today = datetime.datetime.today() # 今天 time_inv = datetime.timedelta(days=1) # 时间间隔 date_tomorrow = date_today + time_inv # 明天 str_start = str(date_today.date()) # 今天日期 str_end = str(date_tomorrow.date()) # 结束日期 now_time = datetime.datetime.now() # 现在时间 hour = now_time.hour minuties = now_time.minute minuties = minuties - minuties % 5 # 将分钟转为5的整数倍 dividing_time_line = str( datetime.datetime(2019, 1, 1, hour, minuties, 0).time()) # 作为分割实际和预测的时间线 # mysql 连接池 mysql_pool = ConnectPool(max_workers=6, host=host, user=user, password=password, port=port, database=database) sql = "select pid,area from digitalsmart.scencemanager where type_flag=0 " # 查询需要获取客流量趋势的景区信息 scence_info_data: Tuple = mysql_pool.select(sql) # 提交查询请求 thread_pool = ThreadPool(max_workers=6) # 连接线程池 time_interval = datetime.timedelta(minutes=33) # 缓存时间 pos_trend = PositioningTrend() predict = True # 是否获取预测数据 for item in scence_info_data: # 更新数据 pid = item[0] # 景区唯一标识 area = item[1] # 景区名字 def fast(region_id, place): """ 请求景区客流量趋势,写入数据库以及内存 :param region_id:景区标识 :param place: 景区名 :return: """ sql_cmd = "select ttime from digitalsmart.scencetrend where pid={0} and ddate='{1}' " \ "order by ttime".format(region_id, int(str_start.replace("-", ""))) # 景区数据最近的更新时间 last_ttime: str = "-1:00:00" # 默认-1点,当为-1时表示目前没有当天的任何数据 try: # 提交查询请求获取最近记录的时间 table_last_time = mysql_pool.select(sql_cmd)[-1][0] last_ttime = str(table_last_time) except IndexError: pass trend_instances = pos_trend.get_place_index( name=place, placeid=region_id, date_start=str_start, date_end=str_end, predict=predict) # 获取趋势数据 cache_data_mapping = dict() # 用来缓存数据 for trend in trend_instances: # 插入数据以及缓存--将实际的数据插入数据库,实际和预测的放在内存 ttime = trend.detailtime # 该时间点 rate = trend.index # 该时间点指数 cache_data_mapping[ttime] = rate if ttime <= last_ttime or ttime > dividing_time_line: # 过滤最近记录时间前的数据 continue region_id = trend.region_id # 景区标识 ddate = trend.ddate # 目前日期 sql_cmd = "insert into digitalsmart.scencetrend(pid, ddate, ttime, rate) VALUE(%d,%d,'%s',%f)" % ( region_id, ddate, ttime, rate) mysql_pool.sumbit(sql_cmd) # 写入数据库 # 缓存key if not cache_data_mapping: return redis_key = "trend:{pid}".format(pid=region_id) # 缓存数据 self._redis_worker.hashset(name=redis_key, mapping=cache_data_mapping) self._redis_worker.expire(name=redis_key, time_interval=time_interval) thread_pool.submit(fast, pid, area) # 提交任务 thread_pool.run() # 执行任务队列 thread_pool.close() # 关闭线程池 mysql_pool.close() # 关闭mysql连接池 print("景区趋势挖掘完毕")
class CompleteDataInterface: """ 在使用redis的连接池访问redis里的资源时,连接池数必须大于等于并发数(二者同时小于redis可支持的最大连接数), 否则多出来的并发数将会因为分配不到redis的资源而收到报错信息 """ def __init__(self): self.redis_worke = RedisConnectPool(max_workers=10) self.mysql_worke = ConnectPool(max_workers=1, host=host, user=user, password=password, port=port, database=database) def __del__(self): self.mysql_worke.close() del self.redis_worke del self.mysql_worke @staticmethod def check_keys_complete(source_keys: list, target_keys: list) -> List: """ 检查缓存key的完整性 :param source_keys: 完整的keys :param target_keys: 需要的检查的keys :return: """ if len(source_keys) == len(target_keys): # 检查是否完全正确 for key in target_keys: result = source_keys.count(key) if result != 1: # 有不正确的数据 return source_keys return target_keys else: return source_keys @staticmethod def check_data_complete(source_data: list, time_interval: datetime.timedelta) -> bool: """ 检查数据的完整性 :param source_data: 此处采用检查redis缓存的时间序列时间序列,格式HH:MM:SS :param time_interval: 时间序列的间隔 :return:true表示数据完整 """ if len(source_data) == 0: return False now_time = datetime.datetime.now() temp_collection = list() for item in source_data: temp_collection.append(item.decode()) source_data = temp_collection max_time = max(source_data) # 最大的时间 last_time = time.strptime(max_time, "%H:%M:%S") redis_last_time = datetime.datetime(now_time.year, now_time.month, now_time.day, last_time.tm_hour, last_time.tm_min, last_time.tm_sec) init_time = datetime.datetime(now_time.year, now_time.month, now_time.day, 0, 0, 0) # 数据的起始时间 standard_length = int( (redis_last_time.timestamp() - init_time.timestamp()) / time_interval.seconds) + 1 # 完整数据的长度 if len(source_data) < standard_length: return False else: return True def get_mysql_complete_keys(self, sql, regular) -> Iterator: """ 获取mysql数据组合成完整的缓存keys :param sql: mysql查询语句 :param regular: key模板 :return:key """ iter_pids = self.mysql_worke.select(sql) for pid in iter_pids: yield regular % pid def get_complete_keys(self, sql: str, complete_keys_regular: str, search_regular: str) -> List: """ 对比mysql组成的key和redis缓存的key产生最完整的keys :param sql:查询语句 :param complete_keys_regular: mysql组合的缓存key模板 :param search_regular: redis缓存的key模板 :return: """ # 获取mysql组合成的完整的keys source_complete_keys = self.get_mysql_complete_keys(sql, complete_keys_regular) # redis key扫描 keys = self.redis_worke.get_keys(search_regular) target_keys = list() for key in keys: # 解码 key = key.decode() target_keys.append(key) # 检查keys是否完整,获得最完整的keys comple_keys = self.check_keys_complete(list(source_complete_keys), target_keys) return comple_keys def time_difference(self, now_time: datetime, complete_keys: list) -> int: """ 获取redis数据缓存时间中偏离现在的最长时间 :param now_time: 此时 :param complete_keys:redis缓存的keys :return:时间差 """ time_list = list() # 存放缓存数据的key的最大时间 for key in complete_keys: redis_data = self.redis_worke.hash_get_all(key) # 只要有出现了空数据,则需要调用检查 if not redis_data: return 1000000 max_time = max(redis_data.keys()).decode() time_list.append(max_time) min_time = min(time_list) last_time = time.strptime(min_time, "%H:%M:%S") redis_last_time = datetime.datetime(now_time.year, now_time.month, now_time.day, last_time.tm_hour, last_time.tm_min, last_time.tm_sec) time_inv = now_time.timestamp() - redis_last_time.timestamp() # 时间差 return time_inv