Beispiel #1
0
    def get_and_write_yeartraffic_into_database(
            self, mysql_pool: ConnectPool, city_pid: int,
            time_interval: datetime.timedelta):
        yeartraffic_instances = self.yeartraffic(city_pid, mysql_pool)

        if len(yeartraffic_instances
               ) == 0 or yeartraffic_instances is None:  # 此次请求没有数据
            return

        for yeartraffic in yeartraffic_instances:
            region_id = yeartraffic.region_id
            date = yeartraffic.date
            index = yeartraffic.index
            sql_cmd = "insert into digitalsmart.yeartraffic(pid, tmp_date, rate) VALUE (%d,%d,%f)" % (
                region_id, date, index)
            mysql_pool.sumbit(sql_cmd)
        sql = "select tmp_date,rate from digitalsmart.yeartraffic where pid={pid} and tmp_date>=20190101;".format(
            pid=city_pid)
        yeartraffic_data = mysql_pool.select(sql)
        cache_yeartraffic_mapping = dict()
        for tmp_date, rate in yeartraffic_data:
            cache_yeartraffic_mapping[tmp_date] = rate
        if not cache_yeartraffic_mapping:
            return
        redis_key = "yeartraffic:{pid}".format(pid=city_pid)
        self._redis_worker.set(redis_key,
                               json.dumps(cache_yeartraffic_mapping))
        self._redis_worker.expire(name=redis_key, time_interval=time_interval)
Beispiel #2
0
    def manager_app_active_data(self):  # 每个月一次
        """    获取app的用户画像数据,性别占比,年龄分布,省份覆盖率,app用户关键词"""
        mysql_pool = ConnectPool(max_workers=10,
                                 host=host,
                                 user=user,
                                 password=password,
                                 port=port,
                                 database=database)
        app = AppUserhabit()
        sql = "select id,name from digitalsmart.appinfo"

        result = mysql_pool.select(sql)
        if not result:
            return None
        today = datetime.datetime.today()
        # 每次启动都挖掘前2个月的数据
        start_date = str(
            datetime.datetime(today.year, today.month - 2,
                              1).date())  # "yyyy-mm-01
        for pid, appname in result:
            obj = app.get_app_active_data(appname, pid, start_date)
            if obj is None:
                continue
            name = obj.app_name  # app名
            date = obj.date  # 时间
            active_num = obj.active_num  # 活跃用户数
            active_rate = obj.active_rate  # 活跃用户率
            rate_hight = obj.rate_hight  # 行业基准
            rate_low = obj.rate_low  # 行业均值
            sql_cmd = "insert into digitalsmart.appactive(pid, ddate, activenum, activerate, base_activerate, aver_activerate) " \
                      "VALUE (%d,'%s',%d,%f,%f,%f)" % (pid, date, active_num, active_rate, rate_hight, rate_low)
            mysql_pool.sumbit(sql_cmd)
Beispiel #3
0
    def get_and_write_roadtraffic_into_database(
            self, mysql_pool: ConnectPool, city_pid: int, up_date: int,
            time_interval: datetime.timedelta):
        road_instances = self.road_manager(city_pid)  # 获取道路数据
        if road_instances is None:
            return
        for road in road_instances:
            region_id = road.region_id  # 标识
            roadname = road.roadname  # 路名
            speed = road.speed  # 速度
            direction = road.direction  # 方向
            bounds = road.bounds  # 经纬度数据集
            road_traffic_rate_list = road.road_index_data_list  # 拥堵指数集合
            road_traffic_time_list = road.time_data_list
            num = road.num
            data_pack = json.dumps({
                "num": num,
                "time": road_traffic_time_list,
                "data": road_traffic_rate_list
            })
            rate = road.rate  # 当前拥堵指数
            roadid = road.num  # 用排名表示道路id
            sql_insert = "insert into digitalsmart.roadtraffic(pid, roadname, up_date, speed, direction, " \
                         "bound, data,roadid,rate) VALUE (%d,'%s',%d,%f,'%s','%s','%s',%d,%f) " % (
                             region_id, roadname, up_date, speed, direction, bounds,
                             data_pack, roadid, rate)
            mysql_pool.sumbit(sql_insert)
            sql_cmd = "update  digitalsmart.roadmanager set up_date={0}  where pid={1} and roadid={2}" \
                .format(up_date, region_id, roadid)

            mysql_pool.sumbit(sql_cmd)  # 更新最近更新时间
            # 缓存数据
            # if data is None or bounds is None:  # 道路拥堵指数数据包请求失败情况下
            #     continue
            redis_key = "road:{pid}:{road_id}".format(pid=city_pid,
                                                      road_id=roadid)
            cache_road_data_mapping = {
                "pid": city_pid,
                "roadname": roadname,
                "up_date": up_date,
                "speed": speed,
                "direction": direction,
                "bounds": bounds,
                "data": data_pack,
                "roadpid": roadid,
                "rate": rate
            }
            self._redis_worker.set(redis_key, str(cache_road_data_mapping))
            self._redis_worker.expire(name=redis_key,
                                      time_interval=time_interval)
Beispiel #4
0
    def filter_peopleflow(self, mysql_pool: ConnectPool, positioning_instances: Iterator[Positioning], ddate: int,
                          peoplepid: int,
                          table_id: int) -> \
            List[Positioning]:
        """
        检查数据库是否存在部分数据,存在则不再插入
        :param mysql_pool:  mysql连接池
        :param objs: Positioning列表
        :param ddate: 日期
        :param peoplepid: 数据库查询条件
        :return: list(Positioning)
        """
        sql_cmd = "select ttime from digitalsmart.historyscenceflow{table_id} where pid={pid} and  ddate={ddate} ".format(
            table_id=table_id, pid=peoplepid, ddate=ddate)

        mysql_positioning_data = mysql_pool.select(sql_cmd)  # 从数据库获取已经存在的数据
        dic = {}
        for positioning in positioning_instances:  # 构造对象字典
            dic[positioning.detailTime] = positioning

        for positioning_data in mysql_positioning_data:  # 将存在的数据淘汰掉
            positioning_data = str(positioning_data[0])
            # 因为从数据库取出来数据时不知为什么变为 、 0:00:00这种格式不适合
            detailtime: str = positioning_data if len(
                positioning_data) == 8 else "0" + positioning_data
            try:
                dic.pop(detailtime)
            except KeyError:
                continue
        return list(dic.values())
Beispiel #5
0
    def dealwith_daily_traffic(self,
                               daily_traffic_instances: List[DayilTraffic],
                               pid: int, mysql_pool: ConnectPool, today: int,
                               yesterday: int) -> List[DayilTraffic]:
        """
        重复数据处理
        :param daily_traffic_instances: 交通延迟数据对象包
        :param pid: 城市id
        :param mysql_pool: mysql连接词
        :param today: 今天日期,格式yyyymmdd
        :param yesterday: 昨天日期,格式yyyymmdd
        :return:
        """
        # 将昨天的数据全部剔除
        daily_traffic_instances = list(daily_traffic_instances)
        for i in range(len(daily_traffic_instances)):
            if daily_traffic_instances[i].date > yesterday:
                daily_traffic_instances = daily_traffic_instances[i:]
                break

        "下面是过滤今天已经存在的数据---今天重复的数据剔除"
        sql = "select ttime from  digitalsmart.citytraffic where pid={0} and ddate={1} order by ttime".format(
            pid, today)
        data = mysql_pool.select(sql)

        if len(data):
            ttime = str(data[len(data) - 1][0]) if len(
                str(data[len(data) - 1][0])) == 8 else "0" + str(
                    data[len(data) - 1][0])  # 最新的时间  xx:xx:xx
        else:
            ttime = "-1:00:00"  # 今天还未录入数据的情况
        # 剔除今天重复的数据
        info = self.filter(daily_traffic_instances, ttime)

        return info
    def manager_city_year_traffic(self):
        time_interval = datetime.timedelta(days=1)
        mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port,
                                 database=database)
        sql = "select pid from digitalsmart.citymanager"

        thread_pool = ThreadPool(max_workers=6)
        data = mysql_pool.select(sql)
        for item in data:
            yearpid = item[0]

            thread_pool.submit(self.get_and_write_yeartraffic_into_database, mysql_pool, yearpid, time_interval)
        thread_pool.run()
        thread_pool.close()
        mysql_pool.close()
        print("城市季度交通数据挖掘完毕")
Beispiel #7
0
    def get_and_write_dailytraffic_into_database(
            self, mysql_pool: ConnectPool, city_pid: int, cityname: str,
            time_interval: datetime.timedelta):
        """

        :param mysql_pool: 连接池实例
        :param city_pid: 城市id
        :param cityname: 城市名
        :param time_interval: 缓存时间
        :return:
        """

        dailytraffic_instances = self.get_city_traffic(
            citycode=city_pid)  # 获取交通数据

        if dailytraffic_instances is None:
            print("pid:%d -- city:%s 没有数据" % (city_pid, cityname))

            return

        now = time.time()  # 现在的时间
        # 分好昨今以便分类过滤
        today = int(time.strftime('%Y%m%d', time.localtime(now)))
        yesterday = int(
            time.strftime('%Y%m%d', time.localtime(now - 3600 * 24)))
        dailytraffic_instances = list(dailytraffic_instances)
        # 缓存数据
        cache_traffic_data = dict()
        for dailytraffic in dailytraffic_instances:
            detailtime = dailytraffic.detailtime  # 时间点
            index = dailytraffic.index  # 拥堵指数
            cache_traffic_data[detailtime] = index
        # 缓存数据
        redis_key = "traffic:{0}".format(city_pid)

        self._redis_worker.hashset(name=redis_key, mapping=cache_traffic_data)
        self._redis_worker.expire(name=redis_key, time_interval=time_interval)
        # 过滤掉昨天和已经存在的数据
        filter_dailytraffic_instances = self.dealwith_daily_traffic(
            dailytraffic_instances, city_pid, mysql_pool, today, yesterday)

        # 数据写入mysql
        for dailytraffic in filter_dailytraffic_instances:
            sql_cmd = "insert into  digitalsmart.citytraffic(pid, ddate, ttime, rate)" \
                      " values('%d', '%d', '%s', '%f');" % (
                          city_pid, dailytraffic.date, dailytraffic.detailtime, dailytraffic.index)
            mysql_pool.sumbit(sql_cmd)
    def manager_city_traffic(self):
        """
        获取城市实时交通拥堵情况并写入数据库,半小时执行一次
        :return:
        """
        mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port,
                                 database=database)
        sql = "select pid, name from digitalsmart.citymanager"
        data = mysql_pool.select(sql)
        # 千万不要开系统自带的线程池,占用的内存过大,而且每次线程退出后内存都没有释放,而是一直累加。使用自定义线程池,
        thread_pool = ThreadPool(max_workers=6)
        time_interval = datetime.timedelta(minutes=40)  # 缓存时间
        for item in data:
            city_pid = item[0]
            city_name = item[1]

            thread_pool.submit(self.get_and_write_dailytraffic_into_database, mysql_pool, city_pid, city_name,
                               time_interval)
        thread_pool.run()
        thread_pool.close()
        mysql_pool.close()
        print("城市交通数据挖掘完毕")
    def manager_city_road_traffic(self):
        """
        获取每个城市实时前10名拥堵道路数据-----10分钟执行一遍
        :return:
        """
        mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port,
                                 database=database)
        up_date = int(datetime.datetime.now().timestamp())  # 记录最新的更新时间

        sql = "select pid from digitalsmart.citymanager"

        data = mysql_pool.select(sql)  # pid集合
        time_interval = datetime.timedelta(minutes=60)  # 缓存时间

        for item in data:  # 这里最好不要并发进行,因为每个pid任务下都有10个子线程,在这里开并发 的话容易被封杀

            city_pid = item[0]

            self.get_and_write_roadtraffic_into_database(mysql_pool, city_pid, up_date, time_interval)

        mysql_pool.close()

        print("城市道路交通数据挖掘完毕")
Beispiel #10
0
    def __dealwith_year_traffic(self, info: Iterator, pid: int,
                                mysql_pool: ConnectPool,
                                lastdate: int) -> List[YearTraffic]:
        sql = "select tmp_date from digitalsmart.yeartraffic where pid={0} and tmp_date>= {1} order by tmp_date".format(
            pid, lastdate)
        data = mysql_pool.select(sql)

        if len(data) == 0:
            return list(info)
        last_date: int = data[-1]  # 最近的日期

        info = list(info)
        if len(info) == 0:  # 请求失败情况下
            return []
        i = -1
        for i in range(len(info)):
            if info[i].date == last_date:
                break

        return info[i + 1:]
Beispiel #11
0
#         data = cursor.fetchall()
#
#         if len(data) == 0:
#             cursor.close()
#             return info
#         lis = []
#
#         for item in info:
#             if item['date'] != date:
#                 continue
#             lis.append(dict(zip(item.values(), item.keys())))
#         info = lis
#         for olddata in data:
#             self.filter(info, olddata[0], olddata[1])
#         lis = []
#         for item in info:
#             lis.append(dict(zip(item.values(), item.keys())))
#         info = lis
#         return info

if __name__ == "__main__":
    p = ScenceFlow()
    pool = ConnectPool(max_workers=1)
    db = pool.work_queue.get()
    p.get_scence_situation(db, 1365, 555)
    # db = pymysql.connect(host=host, user=user, password=password, database=scencedatabase,
    #                      port=port)
    # data = p.get_scence_situation(db, 1174)
    # #
    # p.write_scence_situation(db, data)
 def __init__(self):
     self.redis_worke = RedisConnectPool(max_workers=10)
     self.mysql_worke = ConnectPool(max_workers=1, host=host, user=user, password=password, port=port,
                                    database=database)
    def manager_city_airstate(self):
        """
        更新空气质量数据
        :return:
        """
        semaphore = Semaphore(1)
        queue = Queue(1)  # 用来通知更新时间
        mysql_pool = ConnectPool(max_workers=7, host=host, user=user, password=password, port=port,
                                 database=database)
        now = datetime.datetime.now()

        sql = "select pid,name from digitalsmart.citymanager"
        result = list(mysql_pool.select(sql))
        sql = "select lasttime from digitalsmart.airstate where flag=1"
        try:
            lasttime = mysql_pool.select(sql)[0][0]  # 最近更新时间
        except TypeError:
            lasttime = None
        weather = Weather()
        iter_citys_waether_objs = weather.get_city_weather_pid()  # 获取城市天气id
        city_map = dict()
        for obj in iter_citys_waether_objs:
            city_map[obj.city] = obj.aqi_pid
        thread_pool = ThreadPool(max_workers=6)
        count = len(result)  # 任务计数,0时通知更新时间
        time_interval = datetime.timedelta(minutes=60)
        for item in result:
            citypid = item[0]
            city = item[1]
            city_weather_pid = city_map[city]

            def fast(pid, weather_pid):
                aqi_state = weather.last_air_state(weather_pid)  # 请求数据

                aqi = aqi_state.aqi
                pm2 = aqi_state.pm2
                pm10 = aqi_state.pm10
                so2 = aqi_state.so2
                no2 = aqi_state.no2
                co = aqi_state.co
                o3 = aqi_state.o3
                # 缓存数据
                redis_key = "air:{0}".format(pid)
                value = {"pid": pid, "aqi": aqi, "pm2": pm2, "pm10": pm10, "co": co, "no2": no2, "o3": o3, "so2": so2,
                         "flag": 1, "lasttime": str(now)}
                self._redis_work.hashset(redis_key, value)
                self._redis_work.expire(name=redis_key, time_interval=time_interval)
                # 更新数据库
                sql_cmd = "insert into digitalsmart.airstate(pid, aqi, pm2, pm10, co, no2, o3, so2, flag, lasttime) " \
                          "value (%d,%d,%d,%d,%f,%d,%d,%d,%d,'%s')" % (pid, aqi, pm2, pm10, co, no2, o3, so2, 1, now)

                mysql_pool.sumbit(sql_cmd)
                semaphore.acquire()
                nonlocal count
                count -= 1  # 计数
                semaphore.release()
                if count == 0:  # 所有任务完成,通知数据库可以更新天气时间了
                    queue.put(1)
                return

            thread_pool.submit(fast, citypid, city_weather_pid)
        thread_pool.run()
        thread_pool.close()
        # 更新时间
        if lasttime:
            sql = "update digitalsmart.airstate set flag=0 where lasttime='{0}'".format(lasttime)
            queue.get()
            mysql_pool.sumbit(sql)
        mysql_pool.close()
        print("天气数据写入成功")
Beispiel #14
0
    def manager_scence_situation(self):
        """
        景区客流数据管理----半小时一轮

        """
        # 景区数据源类别--百度为1,腾讯为0
        type_flag = 1
        # 接连接池

        mysql_pool = ConnectPool(max_workers=6,
                                 host=host,
                                 user=user,
                                 password=password,
                                 port=port,
                                 database=database)
        # 获取需要获取实时客流量数据的景区信息
        sql = "select ds.pid,dt.table_id from digitalsmart.scencemanager as ds inner join digitalsmart.tablemanager " \
              "as dt on ds.type_flag=dt.flag   and ds.pid=dt.pid where ds.type_flag=1"
        # 提交mysql查询
        iterator_pids = mysql_pool.select(sql)
        # 线程池
        thread_pool = ThreadPool(max_workers=6)
        ddate: int = int(
            str(datetime.datetime.today().date()).replace("-", ''))

        # 缓存时间
        time_interval = datetime.timedelta(minutes=33)
        scence = ScenceFlow()
        # 开始请求
        for pid, table_id in iterator_pids:

            def fast(area_id, table_index):
                """
                请求景区客流量数据以及写入数据库和内存
                :param area_id: 景区id
                :param table_index: 景区所处表序号
                :return:
                """
                # 获取最新的客流量数据
                positioning_instances = scence.get_scence_situation(area_id)
                # 过滤重复的数据后的定位人流数据
                filter_positioning_instances = iter(
                    scence.filter_peopleflow(mysql_pool, positioning_instances,
                                             ddate, area_id, table_index))

                # 缓存人流定位数据
                cache_data_mapping = dict()
                for positioning in positioning_instances:
                    cache_data_mapping[
                        positioning.detailTime] = positioning.num
                # 缓存key
                redis_key = "scence:{0}:{1}".format(area_id, type_flag)

                # 缓存
                self._redis_worker.hashset(name=redis_key,
                                           mapping=cache_data_mapping)
                self._redis_worker.expire(name=redis_key,
                                          time_interval=time_interval)

                # 确定插入哪张表
                sql_format = "insert into digitalsmart.historyscenceflow{table_index} (pid, ddate, ttime, num) " \
                             "values ('%d','%d','%s','%d')".format(table_index=table_index)
                # 开始插入mysql数据库
                for positioning in filter_positioning_instances:
                    sql_cmd = sql_format % (
                        positioning.region_id, positioning.date,
                        positioning.detailTime, positioning.num)
                    # 提交
                    mysql_pool.sumbit(sql_cmd)

            # 提交任务
            thread_pool.submit(fast, pid, table_id)
        # 开始执行任务队列
        thread_pool.run()
        # 关闭线程池
        thread_pool.close()
        # 关闭mysql连接池
        mysql_pool.close()

        print("百度资源景区数据挖掘完毕")
Beispiel #15
0
    def manager_scenece_people(self):
        """
        更新某时刻的人流数据
        :return:
        """
        self.mysql_pool = ConnectPool(max_workers=8,
                                      host=host,
                                      user=user,
                                      password=password,
                                      port=port,
                                      database=database)  # mysql连接池

        up_date = int(datetime.datetime.now().timestamp())  # 获取当前时间戳

        sql: str = "select pid,latitude,longitude from digitalsmart.scencemanager where type_flag=0"  # 查询需要查询的景区信息

        data = self.mysql_pool.select(sql)
        date_today = datetime.datetime(2019, 10, 8, 1, 1, 0)  # 今天日期
        ddate = str(date_today.date())
        tmp_date = date_today.timestamp()  # 今天时间戳
        if date_today.time().minute % 5 > 0:  # 纠正计算挤时间,分钟必须事5的倍数
            now_time = date_today.time()  # 当前时间点
            minute = now_time.minute - now_time.minute % 5  # 调整当前时间点的分钟
            detailtime = "{0:0>2}:{1:0>2}:00".format(now_time.hour, minute)
        else:
            detailtime = time.strftime("%H:%M:00", time.localtime(tmp_date))
        thread_pool = ThreadPool(max_workers=6)  # 开启线程池
        pos = PositioningSituation()
        for info in data:  # 更新数据

            def fast(item):
                """
                请求景区客流量趋势,写入数据库以及内存
                :param item: 景区基本信息字典
                :return:
                """
                region_id = item[0]  # 景区标识
                float_lat = item[1]  # 景区纬度
                float_lon = item[2]  # 景区经度
                sql_cmd = "select table_id from digitalsmart.tablemanager where pid={0}".format(
                    region_id)  # 判断数据对应在哪张表插入

                table_id = self.mysql_pool.select(sql_cmd)[0][0]  # 数据对应插在那张表
                last_positioning_data = pos.get_scenece_people_json_data(
                    date=ddate, dateTime=detailtime,
                    region_id=region_id)  # 请求最新的人流分布数据

                if not last_positioning_data:  # 请求失败
                    return
                self.manager_scenece_people_distribution(
                    last_positioning_data, region_id, up_date, float_lat,
                    float_lon, table_id)  # 更新人流分布情况数据
                self.manager_scenece_people_situation(table_id,
                                                      last_positioning_data,
                                                      region_id, ddate,
                                                      detailtime)

            thread_pool.submit(fast, info)  # 提交任务

        thread_pool.run()  # 执行任务队列
        thread_pool.close()  # 关闭线程池
        self.mysql_pool.close()
        print("景区人流数据挖掘完毕")
Beispiel #16
0
class ManagerScence(PositioningPeople):
    """
    缓存数据格式
        景区人数: key= "scence:{0}:{1}".format(pid,type_flag),value={"HH:MM:SS":num,.....}
        人流趋势:key = "trend:{pid}".format(pid=region_id) ,value={'00:00:00':rate}
        人流分布: key = "distribution:{0}".format(pide)  value=str([{"lat": centerlat + item.latitude, "lng": centerlon + item.longitude,
                               "count": item.number},......)



    """
    def __init__(self):
        self._redis_worker = RedisConnectPool(max_workers=7)

    def __del__(self):
        del self._redis_worker

    def manager_scence_situation(self):
        """
        景区客流数据管理----半小时一轮

        """
        # 景区数据源类别--百度为1,腾讯为0
        type_flag = 1
        # 接连接池

        mysql_pool = ConnectPool(max_workers=6,
                                 host=host,
                                 user=user,
                                 password=password,
                                 port=port,
                                 database=database)
        # 获取需要获取实时客流量数据的景区信息
        sql = "select ds.pid,dt.table_id from digitalsmart.scencemanager as ds inner join digitalsmart.tablemanager " \
              "as dt on ds.type_flag=dt.flag   and ds.pid=dt.pid where ds.type_flag=1"
        # 提交mysql查询
        iterator_pids = mysql_pool.select(sql)
        # 线程池
        thread_pool = ThreadPool(max_workers=6)
        ddate: int = int(
            str(datetime.datetime.today().date()).replace("-", ''))

        # 缓存时间
        time_interval = datetime.timedelta(minutes=33)
        scence = ScenceFlow()
        # 开始请求
        for pid, table_id in iterator_pids:

            def fast(area_id, table_index):
                """
                请求景区客流量数据以及写入数据库和内存
                :param area_id: 景区id
                :param table_index: 景区所处表序号
                :return:
                """
                # 获取最新的客流量数据
                positioning_instances = scence.get_scence_situation(area_id)
                # 过滤重复的数据后的定位人流数据
                filter_positioning_instances = iter(
                    scence.filter_peopleflow(mysql_pool, positioning_instances,
                                             ddate, area_id, table_index))

                # 缓存人流定位数据
                cache_data_mapping = dict()
                for positioning in positioning_instances:
                    cache_data_mapping[
                        positioning.detailTime] = positioning.num
                # 缓存key
                redis_key = "scence:{0}:{1}".format(area_id, type_flag)

                # 缓存
                self._redis_worker.hashset(name=redis_key,
                                           mapping=cache_data_mapping)
                self._redis_worker.expire(name=redis_key,
                                          time_interval=time_interval)

                # 确定插入哪张表
                sql_format = "insert into digitalsmart.historyscenceflow{table_index} (pid, ddate, ttime, num) " \
                             "values ('%d','%d','%s','%d')".format(table_index=table_index)
                # 开始插入mysql数据库
                for positioning in filter_positioning_instances:
                    sql_cmd = sql_format % (
                        positioning.region_id, positioning.date,
                        positioning.detailTime, positioning.num)
                    # 提交
                    mysql_pool.sumbit(sql_cmd)

            # 提交任务
            thread_pool.submit(fast, pid, table_id)
        # 开始执行任务队列
        thread_pool.run()
        # 关闭线程池
        thread_pool.close()
        # 关闭mysql连接池
        mysql_pool.close()

        print("百度资源景区数据挖掘完毕")

    def manager_scence_trend(self):
        """
        地区人口趋势数据管理---5分钟一次
        :return:
        """

        date_today = datetime.datetime.today()  # 今天
        time_inv = datetime.timedelta(days=1)  # 时间间隔
        date_tomorrow = date_today + time_inv  # 明天
        str_start = str(date_today.date())  # 今天日期
        str_end = str(date_tomorrow.date())  # 结束日期
        now_time = datetime.datetime.now()  # 现在时间
        hour = now_time.hour
        minuties = now_time.minute
        minuties = minuties - minuties % 5  # 将分钟转为5的整数倍
        dividing_time_line = str(
            datetime.datetime(2019, 1, 1, hour, minuties,
                              0).time())  # 作为分割实际和预测的时间线
        # mysql 连接池
        mysql_pool = ConnectPool(max_workers=6,
                                 host=host,
                                 user=user,
                                 password=password,
                                 port=port,
                                 database=database)
        sql = "select pid,area from digitalsmart.scencemanager where type_flag=0 "  # 查询需要获取客流量趋势的景区信息
        scence_info_data: Tuple = mysql_pool.select(sql)  # 提交查询请求
        thread_pool = ThreadPool(max_workers=6)  # 连接线程池
        time_interval = datetime.timedelta(minutes=33)  # 缓存时间
        pos_trend = PositioningTrend()
        predict = True  # 是否获取预测数据
        for item in scence_info_data:  # 更新数据
            pid = item[0]  # 景区唯一标识
            area = item[1]  # 景区名字

            def fast(region_id, place):
                """
                请求景区客流量趋势,写入数据库以及内存
                :param region_id:景区标识
                :param place: 景区名
                :return:
                """

                sql_cmd = "select ttime from digitalsmart.scencetrend where pid={0} and ddate='{1}' " \
                          "order by ttime".format(region_id, int(str_start.replace("-", "")))  # 景区数据最近的更新时间
                last_ttime: str = "-1:00:00"  # 默认-1点,当为-1时表示目前没有当天的任何数据

                try:
                    # 提交查询请求获取最近记录的时间
                    table_last_time = mysql_pool.select(sql_cmd)[-1][0]
                    last_ttime = str(table_last_time)
                except IndexError:
                    pass
                trend_instances = pos_trend.get_place_index(
                    name=place,
                    placeid=region_id,
                    date_start=str_start,
                    date_end=str_end,
                    predict=predict)  # 获取趋势数据
                cache_data_mapping = dict()  # 用来缓存数据

                for trend in trend_instances:  # 插入数据以及缓存--将实际的数据插入数据库,实际和预测的放在内存
                    ttime = trend.detailtime  # 该时间点
                    rate = trend.index  # 该时间点指数
                    cache_data_mapping[ttime] = rate
                    if ttime <= last_ttime or ttime > dividing_time_line:  # 过滤最近记录时间前的数据
                        continue
                    region_id = trend.region_id  # 景区标识
                    ddate = trend.ddate  # 目前日期
                    sql_cmd = "insert into digitalsmart.scencetrend(pid, ddate, ttime, rate) VALUE(%d,%d,'%s',%f)" % (
                        region_id, ddate, ttime, rate)
                    mysql_pool.sumbit(sql_cmd)  # 写入数据库
                # 缓存key
                if not cache_data_mapping:
                    return
                redis_key = "trend:{pid}".format(pid=region_id)
                # 缓存数据
                self._redis_worker.hashset(name=redis_key,
                                           mapping=cache_data_mapping)
                self._redis_worker.expire(name=redis_key,
                                          time_interval=time_interval)

            thread_pool.submit(fast, pid, area)  # 提交任务

        thread_pool.run()  # 执行任务队列
        thread_pool.close()  # 关闭线程池
        mysql_pool.close()  # 关闭mysql连接池

        print("景区趋势挖掘完毕")

    def manager_scenece_people(self):
        """
        更新某时刻的人流数据
        :return:
        """
        self.mysql_pool = ConnectPool(max_workers=8,
                                      host=host,
                                      user=user,
                                      password=password,
                                      port=port,
                                      database=database)  # mysql连接池

        up_date = int(datetime.datetime.now().timestamp())  # 获取当前时间戳

        sql: str = "select pid,latitude,longitude from digitalsmart.scencemanager where type_flag=0"  # 查询需要查询的景区信息

        data = self.mysql_pool.select(sql)
        date_today = datetime.datetime(2019, 10, 8, 1, 1, 0)  # 今天日期
        ddate = str(date_today.date())
        tmp_date = date_today.timestamp()  # 今天时间戳
        if date_today.time().minute % 5 > 0:  # 纠正计算挤时间,分钟必须事5的倍数
            now_time = date_today.time()  # 当前时间点
            minute = now_time.minute - now_time.minute % 5  # 调整当前时间点的分钟
            detailtime = "{0:0>2}:{1:0>2}:00".format(now_time.hour, minute)
        else:
            detailtime = time.strftime("%H:%M:00", time.localtime(tmp_date))
        thread_pool = ThreadPool(max_workers=6)  # 开启线程池
        pos = PositioningSituation()
        for info in data:  # 更新数据

            def fast(item):
                """
                请求景区客流量趋势,写入数据库以及内存
                :param item: 景区基本信息字典
                :return:
                """
                region_id = item[0]  # 景区标识
                float_lat = item[1]  # 景区纬度
                float_lon = item[2]  # 景区经度
                sql_cmd = "select table_id from digitalsmart.tablemanager where pid={0}".format(
                    region_id)  # 判断数据对应在哪张表插入

                table_id = self.mysql_pool.select(sql_cmd)[0][0]  # 数据对应插在那张表
                last_positioning_data = pos.get_scenece_people_json_data(
                    date=ddate, dateTime=detailtime,
                    region_id=region_id)  # 请求最新的人流分布数据

                if not last_positioning_data:  # 请求失败
                    return
                self.manager_scenece_people_distribution(
                    last_positioning_data, region_id, up_date, float_lat,
                    float_lon, table_id)  # 更新人流分布情况数据
                self.manager_scenece_people_situation(table_id,
                                                      last_positioning_data,
                                                      region_id, ddate,
                                                      detailtime)

            thread_pool.submit(fast, info)  # 提交任务

        thread_pool.run()  # 执行任务队列
        thread_pool.close()  # 关闭线程池
        self.mysql_pool.close()
        print("景区人流数据挖掘完毕")

    def manager_scenece_people_distribution(self, scence_people_data: dict,
                                            region_id, tmp_date: int,
                                            centerlat: float, centerlon: float,
                                            table_id: int):
        """
         更新地区人口分布数据---这部分每次有几k条数据插入
        :param scence_people_data: 人流数据包
        :param region_id: 景区标识
        :param tmp_date: 时间戳
        :param centerlat: 中心纬度
        :param centerlon: 中心经度
        :param table_id: 表序号
        :return:
        """
        # scence_people_data中{',': 0}这类数据属于异常数据
        if len(scence_people_data.keys()) <= 1:
            return
        pos = PositioningSituation()
        geographi_instances = pos.get_scence_distribution_situation(
            scence_people_data)  # 获取经纬度人数结构体迭代器

        select_table: str = "insert into digitalsmart.peopleposition{table_id} (pid, tmp_date, lat, lon, num) VALUES" \
            .format(table_id=table_id)  # 确定哪张表
        geographi_instances = list(geographi_instances)  # 存放经纬度人数数据
        insert_mysql_geographi_datapack = self._generator_of_mysql_insert_data(
            geographi_instances, region_id, tmp_date, centerlat,
            centerlon)  # 需要插入数据库的数据生成器
        redis_data = self._generator_of_redis_insert_data(
            geographi_instances, centerlat, centerlon)  # 需要缓存的数据包生成器
        time_interval = datetime.timedelta(minutes=60)  # 缓存时间
        redis_key = "distribution:{0}".format(region_id)  # 缓存key

        # 缓存数据
        try:
            value = json.dumps(list(redis_data))
        except Exception as e:
            print("列表序列化失败", e, region_id)
            return
        self._redis_worker.set(name=redis_key, value=value)  # 缓存
        self._redis_worker.expire(name=redis_key, time_interval=time_interval)
        # 一条条提交到话会话很多时间在日志生成上,占用太多IO了,拼接起来再写入,只用一次日志时间而已
        # 但是需要注意的是,一次性不能拼接太多,管道大小有限制---需要在MySQL中增大Max_allowed_packet,否则会报错
        count = 0  # 用来计数
        geographi_data = list()  # 存放需要写入数据库的数据
        for geographi in insert_mysql_geographi_datapack:
            count += 1
            geographi_data.append(geographi)
            if count % 3000 == 0:
                sql_value = ','.join(geographi_data)
                sql = select_table + sql_value
                # 提交数据
                self.mysql_pool.sumbit(sql)
                geographi_data.clear()
        if len(geographi_data) > 0:
            sql_value = ','.join(geographi_data)
            sql = select_table + sql_value
            self.mysql_pool.sumbit(sql)
        sql = "update digitalsmart.tablemanager  " \
              "set last_date={0} where pid={1};".format(tmp_date, region_id)  # 更新人流分布管理表的修改时间

        self.mysql_pool.sumbit(sql)

    def manager_scenece_people_situation(self, table_id: int,
                                         scence_people_data: dict, pid: int,
                                         ddate: str, ttime: str):
        """
        更新地区人口情况数据  ---这部分每次只有一条数据插入
        :param table_id: 表序号
        :param scence_people_data:人流数据包
        :param pid: 景区标识
        :param ddate: 日期:格式yyyy-mm-dd
        :param ttime: 时间,格式HH:MM:SS
        :return:
        """

        type_flag = 0  # 景区数据源类别--百度为1,腾讯为0
        pos = PositioningSituation()
        positioning_instance = pos.get_scence_people_count(
            scence_people_data, ddate, ttime, pid)
        sql_format = "insert into digitalsmart.historyscenceflow{table_id}(pid, ddate, ttime, num) values (%d,%d,'%s',%d) ".format(
            table_id=table_id)

        sql = sql_format % (
            positioning_instance.region_id, positioning_instance.date,
            positioning_instance.detailTime, positioning_instance.num)
        self.mysql_pool.sumbit(sql)  # 插入mysql数据库
        format_int_of_ddate = int(ddate.replace("-", ''))
        sql = "select ttime,num from digitalsmart.historyscenceflow{table_id} where ddate={ddate};".format(
            table_id=table_id, ddate=format_int_of_ddate)
        people_flow_data = self.mysql_pool.select(sql)  # 需要缓存的数据
        cache_data_mapping = dict()
        for people_flow in people_flow_data:
            ttime = str(people_flow[0])  # 时间
            match_ttime_format = re.match("(\d+):(\d+):(\d+)", ttime)
            hour = match_ttime_format.group(1)
            minutiue = match_ttime_format.group(2)
            second = match_ttime_format.group(3)
            if len(hour) == 1:
                hour = "0" + hour
            ttime = "{0}:{1}:{2}".format(hour, minutiue, second)  # 格式化
            num = people_flow[1]  # 客流数

            cache_data_mapping[ttime] = num
        # 缓存时间
        if not cache_data_mapping:
            return
        time_interval = datetime.timedelta(minutes=60)
        # 缓存
        redis_key = "scence:{0}:{1}".format(pid, type_flag)
        self._redis_worker.hashset(name=redis_key, mapping=cache_data_mapping)
        self._redis_worker.expire(name=redis_key, time_interval=time_interval)

    @staticmethod
    def _generator_of_mysql_insert_data(source_data: list, region_id: int,
                                        tmp_date: int, centerlat: float,
                                        centerlon: float) -> Iterator[str]:
        """
        生产需要插入数据库的景区人流分布数据生成器
        :param source_data: 对象数据包
        :param region_id: 景区标识
        :param tmp_date: 时间戳
        :param centerlat: 中心纬度
        :param centerlon: 中心经度
        :return:
        """
        for instance in source_data:
            yield str((region_id, tmp_date, centerlat + instance.latitude,
                       centerlon + instance.longitude, instance.number))

    @staticmethod
    def _generator_of_redis_insert_data(source_data: list, centerlat: float,
                                        centerlon: float):
        """
        生成需要缓存再redis里的生成器
        :param source_data: 对象数据包
        :param centerlat: 中心纬度
        :param centerlon: 中心经度
        :return:
        """
        for instance in source_data:
            yield {
                "lat": centerlat + instance.latitude,
                "lng": centerlon + instance.longitude,
                "count": instance.number
            }
Beispiel #17
0
    def manager_scence_trend(self):
        """
        地区人口趋势数据管理---5分钟一次
        :return:
        """

        date_today = datetime.datetime.today()  # 今天
        time_inv = datetime.timedelta(days=1)  # 时间间隔
        date_tomorrow = date_today + time_inv  # 明天
        str_start = str(date_today.date())  # 今天日期
        str_end = str(date_tomorrow.date())  # 结束日期
        now_time = datetime.datetime.now()  # 现在时间
        hour = now_time.hour
        minuties = now_time.minute
        minuties = minuties - minuties % 5  # 将分钟转为5的整数倍
        dividing_time_line = str(
            datetime.datetime(2019, 1, 1, hour, minuties,
                              0).time())  # 作为分割实际和预测的时间线
        # mysql 连接池
        mysql_pool = ConnectPool(max_workers=6,
                                 host=host,
                                 user=user,
                                 password=password,
                                 port=port,
                                 database=database)
        sql = "select pid,area from digitalsmart.scencemanager where type_flag=0 "  # 查询需要获取客流量趋势的景区信息
        scence_info_data: Tuple = mysql_pool.select(sql)  # 提交查询请求
        thread_pool = ThreadPool(max_workers=6)  # 连接线程池
        time_interval = datetime.timedelta(minutes=33)  # 缓存时间
        pos_trend = PositioningTrend()
        predict = True  # 是否获取预测数据
        for item in scence_info_data:  # 更新数据
            pid = item[0]  # 景区唯一标识
            area = item[1]  # 景区名字

            def fast(region_id, place):
                """
                请求景区客流量趋势,写入数据库以及内存
                :param region_id:景区标识
                :param place: 景区名
                :return:
                """

                sql_cmd = "select ttime from digitalsmart.scencetrend where pid={0} and ddate='{1}' " \
                          "order by ttime".format(region_id, int(str_start.replace("-", "")))  # 景区数据最近的更新时间
                last_ttime: str = "-1:00:00"  # 默认-1点,当为-1时表示目前没有当天的任何数据

                try:
                    # 提交查询请求获取最近记录的时间
                    table_last_time = mysql_pool.select(sql_cmd)[-1][0]
                    last_ttime = str(table_last_time)
                except IndexError:
                    pass
                trend_instances = pos_trend.get_place_index(
                    name=place,
                    placeid=region_id,
                    date_start=str_start,
                    date_end=str_end,
                    predict=predict)  # 获取趋势数据
                cache_data_mapping = dict()  # 用来缓存数据

                for trend in trend_instances:  # 插入数据以及缓存--将实际的数据插入数据库,实际和预测的放在内存
                    ttime = trend.detailtime  # 该时间点
                    rate = trend.index  # 该时间点指数
                    cache_data_mapping[ttime] = rate
                    if ttime <= last_ttime or ttime > dividing_time_line:  # 过滤最近记录时间前的数据
                        continue
                    region_id = trend.region_id  # 景区标识
                    ddate = trend.ddate  # 目前日期
                    sql_cmd = "insert into digitalsmart.scencetrend(pid, ddate, ttime, rate) VALUE(%d,%d,'%s',%f)" % (
                        region_id, ddate, ttime, rate)
                    mysql_pool.sumbit(sql_cmd)  # 写入数据库
                # 缓存key
                if not cache_data_mapping:
                    return
                redis_key = "trend:{pid}".format(pid=region_id)
                # 缓存数据
                self._redis_worker.hashset(name=redis_key,
                                           mapping=cache_data_mapping)
                self._redis_worker.expire(name=redis_key,
                                          time_interval=time_interval)

            thread_pool.submit(fast, pid, area)  # 提交任务

        thread_pool.run()  # 执行任务队列
        thread_pool.close()  # 关闭线程池
        mysql_pool.close()  # 关闭mysql连接池

        print("景区趋势挖掘完毕")
class CompleteDataInterface:
    """
    在使用redis的连接池访问redis里的资源时,连接池数必须大于等于并发数(二者同时小于redis可支持的最大连接数),
        否则多出来的并发数将会因为分配不到redis的资源而收到报错信息
    """

    def __init__(self):
        self.redis_worke = RedisConnectPool(max_workers=10)
        self.mysql_worke = ConnectPool(max_workers=1, host=host, user=user, password=password, port=port,
                                       database=database)

    def __del__(self):
        self.mysql_worke.close()
        del self.redis_worke
        del self.mysql_worke

    @staticmethod
    def check_keys_complete(source_keys: list, target_keys: list) -> List:
        """
        检查缓存key的完整性
        :param source_keys: 完整的keys
        :param target_keys: 需要的检查的keys
        :return:
        """
        if len(source_keys) == len(target_keys):
            # 检查是否完全正确
            for key in target_keys:
                result = source_keys.count(key)
                if result != 1:  # 有不正确的数据
                    return source_keys

            return target_keys
        else:

            return source_keys

    @staticmethod
    def check_data_complete(source_data: list, time_interval: datetime.timedelta) -> bool:
        """
         检查数据的完整性
         :param source_data: 此处采用检查redis缓存的时间序列时间序列,格式HH:MM:SS
        :param time_interval: 时间序列的间隔
        :return:true表示数据完整
        """
        if len(source_data) == 0:
            return False
        now_time = datetime.datetime.now()
        temp_collection = list()
        for item in source_data:
            temp_collection.append(item.decode())
        source_data = temp_collection
        max_time = max(source_data)  # 最大的时间
        last_time = time.strptime(max_time, "%H:%M:%S")
        redis_last_time = datetime.datetime(now_time.year, now_time.month, now_time.day, last_time.tm_hour,
                                            last_time.tm_min,
                                            last_time.tm_sec)
        init_time = datetime.datetime(now_time.year, now_time.month, now_time.day, 0, 0, 0)  # 数据的起始时间
        standard_length = int(
            (redis_last_time.timestamp() - init_time.timestamp()) / time_interval.seconds) + 1  # 完整数据的长度
        if len(source_data) < standard_length:
            return False
        else:
            return True

    def get_mysql_complete_keys(self, sql, regular) -> Iterator:
        """
        获取mysql数据组合成完整的缓存keys
        :param sql: mysql查询语句
        :param regular: key模板
        :return:key
        """
        iter_pids = self.mysql_worke.select(sql)
        for pid in iter_pids:
            yield regular % pid

    def get_complete_keys(self, sql: str, complete_keys_regular: str, search_regular: str) -> List:
        """
        对比mysql组成的key和redis缓存的key产生最完整的keys
        :param sql:查询语句
        :param complete_keys_regular: mysql组合的缓存key模板
        :param search_regular: redis缓存的key模板
        :return:
        """

        # 获取mysql组合成的完整的keys
        source_complete_keys = self.get_mysql_complete_keys(sql, complete_keys_regular)

        # redis key扫描
        keys = self.redis_worke.get_keys(search_regular)
        target_keys = list()
        for key in keys:  # 解码
            key = key.decode()
            target_keys.append(key)
        # 检查keys是否完整,获得最完整的keys
        comple_keys = self.check_keys_complete(list(source_complete_keys), target_keys)
        return comple_keys

    def time_difference(self, now_time: datetime, complete_keys: list) -> int:
        """
        获取redis数据缓存时间中偏离现在的最长时间
        :param now_time: 此时
        :param complete_keys:redis缓存的keys
        :return:时间差
        """
        time_list = list()  # 存放缓存数据的key的最大时间
        for key in complete_keys:
            redis_data = self.redis_worke.hash_get_all(key)
            #  只要有出现了空数据,则需要调用检查
            if not redis_data:
                return 1000000
            max_time = max(redis_data.keys()).decode()
            time_list.append(max_time)

        min_time = min(time_list)
        last_time = time.strptime(min_time, "%H:%M:%S")
        redis_last_time = datetime.datetime(now_time.year, now_time.month, now_time.day, last_time.tm_hour,
                                            last_time.tm_min,
                                            last_time.tm_sec)
        time_inv = now_time.timestamp() - redis_last_time.timestamp()  # 时间差
        return time_inv