def new_aux_objects(is_config=True,
                    is_dao=True,
                    is_channel=True,
                    is_model=True):
    # 加载单例配置对象
    if is_config == True:
        my_config = QualityControlConfig()
    else:
        my_config = None

    # 生成数据操作层对象
    if is_dao == True:
        dao = DataOperationsByMysql()
    else:
        dao = None

    # 获得vargroup相关数据
    if is_channel == True:
        channel_df = dao.query_channels()
        aq_dict = dao.query_aq_type_in_dict()
        vg_c = VargroupChannels(channel_df, aq_dict)
    else:
        vg_c = None

    # 获得模型相关数据
    if is_model == True:
        models = dao.query_consistency_model()
    else:
        models = None

    return my_config, dao, vg_c, models
def test_get_qc_dev():
    dao = DataOperationsByMysql()
    vg_versions = VargroupQCVersions(dao)
    df_group = dao.query_active_devices()
    version = '1'
    var_name = 'TSP'
    print(vg_versions.get_qc_dev_by_version_and_var(version, var_name, df_group))
 def __init__(self, hour_minute):
     """
     先初始化一些接口、参数
     """
     self.config = QualityControlConfig()
     self.dao = DataOperationsByMysql(self.config, hour_minute)
     self.variables = self.config.get_config_global_data('full_pollutants')
     self.qc_routine = QualityControlRoutine(self.dao)
     self.dir = self.config.get_config_global_data(
         'save_path_for_by_minute_qc')  #根据实际情况更改
Esempio n. 4
0
def test_vargroup_channels():
    dao = DataOperationsByMysql()
    aq_type_dict = dao.query_aq_type_in_dict()
    vargroup_channel_df = dao.query_channels()

    vg_channels = VargroupChannels(vargroup_channel_df, aq_type_dict)

    print(vg_channels.channel_by_vargroup_and_var)
    print('\n')
    print(vg_channels.channel_by_vargroup)
    print('\n')
    print(vg_channels.get_var_names_by_vargroup('YSRDAQ07HW'))
def test_site_inter():
    hour = '2018-12-01 00:00:00'
    city_id = [771]
    config = QualityControlConfig()
    dao = DataOperationsByMysql(config, hour)
    device_list_info = dao.query_active_devices_by_city(city_id)
    device_list = device_list_info['DEV_ID'].unique().tolist()
    spatial_indexer = NeighborDevices(dao, device_list)
    dev_measure_point_id_dict = df_dict(device_list_info)
    inter = SiteInterpolation(dao, hour, spatial_indexer, device_list_info, dev_measure_point_id_dict)
    #加上is_for_var参数 是针对部分污染物进行插值的 is_for_var参数是必须的
    df = inter.execute_site_interpolate(city_id,hour,is_for_var=['PM25','PM10'])
    df.to_csv('test_all.csv')
    print(df.head())
Esempio n. 6
0
def new_aux_objects(hour):
    # 加载单例配置对象
    my_config = QualityControlConfig()

    # 生成数据操作层对象
    dao = DataOperationsByMysql(my_config, hour)

    # 获得vargroup相关数据
    channel_df = dao.query_channels()
    aq_dict = dao.query_aq_type_in_dict()

    vg_c = VargroupChannels(channel_df, aq_dict)

    # 获得模型相关数据
    models = dao.query_consistency_model()

    return my_config, dao, vg_c, models
def test_prepare_data():
    print('Enter prepare data')

    dao = DataOperationsByMysql()
    config = QualityControlConfig()
    qc_routine = QualityControlRoutine(dao, [206],  config, None, '2018-10-25 10:00:00')

    qc_routine.prepare_data()
def test_agg_capture():
    # 加载单例配置对象
    my_config = QualityControlConfig()

    # 生成数据操作层对象
    dao = DataOperationsByMysql()

    # 获得设备清单及关联信息
    dev_df = dao.query_active_devices_by_city([1])
    print(dev_df.head())
    print('\n')

    # 按照设备清单获得相关的capture dataframe
    dfs = dao.query_capture_data_by_hour('2018-09-10 00:00:00', dev_df)
    print(dfs.keys())

    # 获得vargroup相关数据
    channel_df = dao.query_channels()
    aq_dict = dao.query_aq_type_in_dict()
    vg_c = VargroupChannels(channel_df, aq_dict)

    # 获得模型相关数据
    models = dao.query_consistency_model()

    ac = AggregateCapture(my_config, dfs, vg_c, models)
    ac.capture_to_org()
Esempio n. 9
0
 def __init__(self, hour):
     """
     先初始化一些接口、参数
     """
     self.config = QualityControlConfig()
     self.dao = DataOperationsByMysql(self.config, hour)
     self.variables = self.config.get_config_global_data('full_pollutants')
     self.qc_routine = QualityControlRoutine(self.dao)
     self.adjust_df = pd.DataFrame()
     self.interpolate_df = pd.DataFrame()
     self.var_type_id_to_var_dict = {
         1: 'PM25',
         2: 'PM10',
         3: 'SO2',
         4: 'CO',
         5: 'NO2',
         6: 'O3',
         10: 'TSP',
         8: 'TVOC'
     }
Esempio n. 10
0
def main():
    import common
    from dao.mysql_impl import DataOperationsByMysql
    from config.qc_config import QualityControlConfig
    hour = '2018-11-14 01:00:00'
    config = QualityControlConfig()
    dao = DataOperationsByMysql(config,hour)
    nei_dev = NeighborDevices(dao,city_id=[1])
    # print(nei_dev.find_nearest_site('YSRDPM250000004796',4))
    # print(nei_dev.find_dev_by_distance('PM25','YSRDPM10P500000050',1))
    non_var_list = nei_dev.find_nearest_site_by_num('YSRDPM10P500000050',5)
    print(non_var_list)
 def sql_data_devices(self,flag):
     """
     将数据库读取的空间坐标和设备编号生成字典,第一个字典键为SENSOR_ID,值为df对应的id
     第二个字典键为SENSOR_ID,值为GOOGLELONGITUDE,GOOGLELATITUDE,ALTITUDE,MEASURE_POINT_ID
     组成的二维字典
     :param flag: 是否为质控
     :return: 返回相对应的字典储存在属性中
     """
     if flag:
         db = DataOperationsByMysql()
         sql_data = db.query_devices_latitude_longitude(1)
     else:
         db = DataOperationsByMysql()
         sql_data = db.query_devices_latitude_longitude(-1)
     index_to_device={}
     site_to_device={}
     for i in range(len(sql_data['SENSOR_ID'])):
         key=sql_data['SENSOR_ID'].values[i]
         value_data_index = i
         value_data_site = sql_data[sql_data['SENSOR_ID'] == key]
         value_site = {'GOOGLELONGITUDE':value_data_site['GOOGLELONGITUDE'].values[0],
                       'GOOGLELATITUDE':value_data_site['GOOGLELATITUDE'].values[0],
                       'ALTITUDE':value_data_site['ALTITUDE'].values[0],
                       'MEASURE_POINT_ID':value_data_site['MEASURE_POINT_ID'].values[0]}
         index_to_device[key] = value_data_index
         site_to_device[key] = value_site
     return [index_to_device,site_to_device]
def qc():
    t_1 = time.time()
    print('Enter prepare data')
    config = QualityControlConfig()
    # hour = '2018-11-26 15:00:00'
    hour = '2018-12-18 21:00:00'
    dao = DataOperationsByMysql(config, hour)
    t_2 = time.time()
    # adjust_df,interpolate_df = qc_routine.execute_train_transmission_by_city([1],'2018-09-05 02:00:00')

    # city_list = [[2], [197, 492], [149], [201], [202], [203], [204], [205], [206], [208],
    #              [210], [212], [213], [229], [231], [232], [235], [238], [239], [245], [291], [296], [297],
    #              [298], [303], [306], [307], [308], [662], [771]]

    for cityid in [[1]]:
        print("城市:{}".format(cityid))
        t_3 = time.time()
        qc_routine = QualityControlRoutine(dao)
        qc_routine.obtain_adjust_data(cityid,hour)
        t_4 = time.time()
        print('城市{} 需要的时间是:{}'.format(cityid,t_4-t_3))
    t_5 = time.time()
    print('Total execution time of QC and transmission is {} seconds'.format(t_5 - t_1))
def test_get_qc_group():
    dao = DataOperationsByMysql()
    vg_versions = VargroupQCVersions(dao)
    version = '2'
    var_name = 'PM25'
    print(vg_versions.get_qc_vargroup_by_version_and_var(version, var_name))
def test_verrsions_qc_vargroup():
    dao = DataOperationsByMysql()
    vg_versions = VargroupQCVersions(dao)
    print(vg_versions.qc_vargroup_by_versions_and_var)
def test_get_qc_version():
    dao = DataOperationsByMysql()
    vg_versions = VargroupQCVersions(dao)
    vargroup_id = 'YSRDAQ0700'
    var_name = 'PM25'
    print(vg_versions.get_qc_version_by_vargroup_and_var(vargroup_id, var_name))
Esempio n. 16
0
                    '城市:%s 时间:%s var:%s %s非质控设备数据可能有异常,原因:该设备测量值%s大于附近设备平均值%s乘以系数%s或者大于'
                    '周围设备,周围设备的测量值为%s' %
                    (self.city, self.hour, var, dev_id, dev_value, dev_mean,
                     concentration_standard, lyst))
            elif dev_value < dev_mean * (1 - concentration_standard):
                logger.debug(
                    '城市:%s 时间:%s var:%s %s非质控设备数据有异常,原因:该设备测量值%s小于附近设备平均值%s乘以系数%s或者小于周围'
                    '设备,周围设备的测量值为%s' %
                    (self.city, self.hour, var, dev_id, dev_value, dev_mean,
                     concentration_standard, lyst))
                return dev_id


if __name__ == '__main__':
    from dao.mysql_impl import DataOperationsByMysql
    from config.qc_config import QualityControlConfig
    from utility.neighbor_devices import NeighborDevices
    city_id = [1]
    hour = '2018-11-01 00:00:00'
    config = QualityControlConfig()
    dao = DataOperationsByMysql(config, hour)
    device_list_info = dao.query_active_devices_by_city(city_id)
    device_list = device_list_info['DEV_ID'].unique().tolist()
    spatial_indexer = NeighborDevices(device_list)
    datacheck = DataCheck(dao, config, spatial_indexer, city_id, hour)
    qc_data = dao.query_qc_dev_org_data_by_city(city_id, hour)
    non_qc_data = dao.query_non_qc_dev_org_data_by_city(city_id, hour)
    print(len(qc_data))
    print(datacheck.qc_data_check(qc_data, 'PM25'))
    print(datacheck.non_qc_data_check(non_qc_data, 'PM25'))
Esempio n. 17
0
class BackCalculation():
    """
    需要回算的场景
    场景1:capture没有数据导致回算  capture_to_org_to_adjust 部分缺数(一小片)
            按设备清单回算非质控设备
    场景2:整个城市进行回算  capture——org 按城市计算  org--adjust  按城市(初始化)
            按城市回算(质控与非质控设备)
    """
    def __init__(self, hour):
        """
        先初始化一些接口、参数
        """
        self.config = QualityControlConfig()
        self.dao = DataOperationsByMysql(self.config, hour)
        self.variables = self.config.get_config_global_data('full_pollutants')
        self.qc_routine = QualityControlRoutine(self.dao)
        self.adjust_df = pd.DataFrame()
        self.interpolate_df = pd.DataFrame()
        self.var_type_id_to_var_dict = {
            1: 'PM25',
            2: 'PM10',
            3: 'SO2',
            4: 'CO',
            5: 'NO2',
            6: 'O3',
            10: 'TSP',
            8: 'TVOC'
        }

    def init_agg_by_city_or_by_device_list(self,
                                           hour,
                                           city_id,
                                           device_list=None):
        # 获得vargroup相关数据
        channel_df = self.dao.query_channels()
        aq_dict = self.dao.query_aq_type_in_dict()
        vg_c = VargroupChannels(channel_df, aq_dict)

        # 获得模型相关数据
        models = self.dao.query_consistency_model()
        #获取设备的相关信息
        if device_list is not None:
            dev_df = self.dao.query_active_devices_by_device_list(device_list)
        else:
            dev_df = self.dao.query_active_devices_by_city(city_id)
        dfs = self.dao.query_capture_data_by_hour(hour, dev_df)
        self.ac = AggregateCapture(self.config, self.dao, dfs, vg_c, models)

    def execute_back_calculation(self,
                                 hour,
                                 city_id,
                                 is_for_org=False,
                                 var_names=None,
                                 dev_list=None):
        """
        回算的主函数
        :param hour: 回算的时间
        :param dev_list: 设备是 list类型
        :param city_id: 城市id 输入的类型必须是 list
        :return: 直接入库
        """
        print('begin back cal......')
        if dev_list is not None:
            # 按设备清单回算非质控设备
            #capture 到 org回算
            self.init_agg_by_city_or_by_device_list(hour,
                                                    city_id,
                                                    device_list=dev_list)
            org_dict = self.ac.capture_to_org(hour)
            # for key in org_dict.keys():
            #     org_dict[key].to_csv('org_{}.csv'.format(key))
            # org 到 adjust
            if var_names:
                self.qc_routine.variables = var_names
            self.qc_routine.qc_variables = self.qc_routine.variables

            # 初始化self.spatial_indexer类
            self.qc_routine.init_spatial_indexer(city_id)
            # 初始化质控后数据不同参数的最大值和最小值
            self.qc_routine.init_qc_data_min_max()
            #先准备好数据库里已经有的质控数据  需要处理的:把var类型和var_type_id对应起来
            adjust_df_from_db = self.prepare_adjust_df(hour)
            self.qc_routine.execute_transmission_by_city(hour,
                                                         city_id,
                                                         dev_list=dev_list)
            adjust_df_all = pd.concat([
                self.qc_routine.all_adjust_df[1].copy(),
                adjust_df_from_db.copy()
            ],
                                      axis=0)
            # self.qc_routine.all_adjust_df[1].copy().to_csv('A.csv')
            # adjust_df_from_db.to_csv('B.csv')
            # adjust_df_all.to_csv('C.csv')
            #去重
            adjust_df_all = adjust_df_all.groupby(['DEV_ID', 'VAR']).first()
            adjust_df_all.reset_index(inplace=True)
            # adjust_df_all = adjust_df_all.groupby(['DEV_ID', 'VAR']).first()
            # adjust_df_all.reset_index(inplace=True)
            self.qc_routine.adjust_df_full = adjust_df_all

            #对已经有的adjust进行审核
            # 初始化审核函数
            self.qc_routine.init_check(city_id, hour)
            self.qc_routine.execute_adj_data_censor(dev_list=dev_list)

            #进行插值
            # #初始化插值类 计算插值后的数据
            self.qc_routine.execute_interpolate_by_city(hour,
                                                        city_id,
                                                        dev_list=dev_list)

            for key in self.qc_routine.all_adjust_df.keys():
                if not self.qc_routine.all_adjust_df[key].empty:
                    self.dao.write_adjust_data(
                        self.qc_routine.all_adjust_df[key], hour)
                    # self.qc_routine.all_adjust_df[key].to_csv('adjust_{}.csv'.format(key))
                else:
                    continue

        else:
            if is_for_org:
                # 按城市进行回算
                #实例化 AggregateCapture 类 对某个城市进行capture到org的计算
                self.init_agg_by_city_or_by_device_list(hour, city_id=city_id)
                self.ac.capture_to_org(hour)
                #对某个城市的设备进行质控
                if var_names:
                    self.qc_routine.variables = var_names
                self.qc_routine.obtain_adjust_data(city_id, hour)
                return
            else:
                #省略capture到org的过程
                #对某个城市的设备进行质控
                if var_names:
                    self.qc_routine.variables = var_names
                self.qc_routine.obtain_adjust_data(city_id, hour)
                return

    def prepare_adjust_df(self, hour):
        adjust_df = self.dao.query_adj_data_by_device_list(
            self.qc_routine.device_list, hour, hour)
        adjust_df['VAR'] = adjust_df.apply(
            lambda x: self.var_type_id_to_var_dict[x.VAR_TYPE_ID], axis=1)
        return adjust_df
class QualityControlRoutineByMinute():
    """
    类似于山西顺义这种城市,对分钟级别的数据进行质控,只做传递不做插值
    """
    def __init__(self, hour_minute):
        """
        先初始化一些接口、参数
        """
        self.config = QualityControlConfig()
        self.dao = DataOperationsByMysql(self.config, hour_minute)
        self.variables = self.config.get_config_global_data('full_pollutants')
        self.qc_routine = QualityControlRoutine(self.dao)
        self.dir = self.config.get_config_global_data(
            'save_path_for_by_minute_qc')  #根据实际情况更改
        # self.dir = '../data/qc_data_minute'  #根据实际情况更改
        # self.adjust_df = pd.DataFrame()
        # self.interpolate_df = pd.DataFrame()
        # self.var_type_id_to_var_dict = {1:'PM25', 2:'PM10', 3:'SO2', 4:'CO', 5:'NO2', 6:'O3', 10:'TSP'}

    def init_agg_by_city_and_by_hour_minute(self, hour_minute, city_id):
        # 获得vargroup相关数据
        channel_df = self.dao.query_channels()
        aq_dict = self.dao.query_aq_type_in_dict()
        vg_c = VargroupChannels(channel_df, aq_dict)

        # 获得模型相关数据
        models = self.dao.query_consistency_model()
        #获取设备的相关信息
        self.dev_df = self.dao.query_active_devices_by_city(city_id)
        #接口要换成取前17.5分钟的capture数据
        dfs = self.dao.query_capture_data_by_minute(hour_minute, self.dev_df)
        self.ac = AggregateCapture(self.config, self.dao, dfs, vg_c, models)

    def execute_quality_control_by_minute(self, hour_minute, city_id):
        """
        按分钟质控的函数接口
        :param hour_minute:
        :param city_id:
        :return:
        """
        print('begin by minute......')

        #分钟级别的 capture 到 org计算
        self.init_agg_by_city_and_by_hour_minute(hour_minute, city_id)
        # self.qc_routine.init_qc_data_min_max()
        org_dict = self.ac.capture_to_org(hour_minute, is_for_minute=True)
        #把内存里的org数据格式转换成取非质控设备org数据那种格式
        # for key in org_dict:
        #     org_dict[key].to_csv('org_{}.csv'.format(key))
        org_df = self.prepare_org_df(org_dict)
        self.qc_routine.qc_variables = self.qc_routine.variables

        # 初始化self.spatial_indexer类
        self.qc_routine.init_spatial_indexer(city_id)
        # 初始化质控后数据不同参数的最大值和最小值
        self.qc_routine.init_qc_data_min_max()
        #先准备好数据库里已经有的质控数据  需要处理的:把var类型和var_type_id对应起来
        self.qc_routine.execute_transmission_by_city(hour_minute,
                                                     city_id,
                                                     is_for_minute=True,
                                                     org_df=org_df)

        #存储到相关路径下
        if self.qc_routine.all_adjust_df[1].empty:
            logger.warning('该分钟级别的质控,没有出数!')
            return

        self.qc_routine.all_adjust_df[1] = self.qc_routine.set_min_and_max(
            self.qc_routine.all_adjust_df[1])
        save_path = fu.get_save_path(hour_minute)
        save_path = '{}/{}'.format(self.dir, save_path)
        save_name = fu.get_csv_name(hour_minute)
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        self.qc_routine.all_adjust_df[1].to_csv('{}/{}'.format(
            save_path, save_name),
                                                index=False)

    def prepare_org_df(self, org_dict):
        """
        把内存的org数据改成取非质控设备org的格式,并且只准备非质控设备的数据
        :param org_dict:
        :return:
        """
        org_df = pd.DataFrame()
        for key in org_dict.keys():
            org_df = pd.concat([org_dict[key], org_df], axis=0)
        org_df.rename(columns={'CAL_TIME': 'TIMESTAMP'}, inplace=True)
        org_df.reset_index(drop=True, inplace=True)
        #增加vargroup_id
        org_df = org_df.merge(self.dev_df, on=['DEV_ID'], how='left')
        #取非质控设备
        non_qc_df = org_df[org_df['RELATE_SITE_ID'] == -1].copy()
        #模拟字段
        need_columns = [
            'DEV_ID', 'PM25', 'PM10', 'SO2', 'CO', 'NO2', 'O3', 'TVOC', 'TSP',
            'HUMIDITY', 'TEMPERATURE', 'TIMESTAMP', 'SITE_ID', 'COUNT_PM25',
            'COUNT_PM10', 'COUNT_SO2', 'COUNT_CO', 'COUNT_NO2', 'COUNT_O3',
            'COUNT_TVOC', 'COUNT_TSP', 'VARGROUP_ID'
        ]
        example = pd.DataFrame(columns=need_columns)
        non_qc_df = pd.concat([example, non_qc_df], axis=0)
        non_qc_df = non_qc_df[need_columns].copy()
        return non_qc_df