예제 #1
0
def insert_new_data(data, darksky_obj):

    new_data = []

    for item in data:
        obj = darksky_obj(gid=item['gid'],
                          timestamp=item['timestamp'],
                          summary=item.get('summary'),
                          icon=item.get('icon'),
                          precip_intensity=item.get('precipIntensity'),
                          precip_probability=item.get('precipProbability'),
                          temperature=item.get('temperature'),
                          apparent_temperature=item.get('apparentTemperature'),
                          dew_point=item.get('dewPoint'),
                          humidity=item.get('humidity'),
                          pressure=item.get('pressure'),
                          wind_speed=item.get('windSpeed'),
                          wind_bearing=item.get('windBearing'),
                          cloud_cover=item.get('cloudCover'),
                          uv_index=item.get('uvIndex'),
                          visibility=item.get('visibility'),
                          ozone=item.get('ozone'))
        new_data.append(obj)

    session.add_all(new_data)
    session.commit()
예제 #2
0
def insert_ppa_data(data, data_table_obj):
    data_obj = []
    for item in data:
        if len(item) == 0 or item.get('sensor_id') is None:
            continue
        obj = data_table_obj(
            sensor_id=item.get('sensor_id'),
            channel=item.get('channel'),
            timestamp=item.get('timestamp'),
            pm1_atm=item.get('pm1_atm'),
            pm2_5_atm=item.get('pm2_5_atm'),
            pm10_atm=item.get('pm10_atm'),
            pm1_cf_1=item.get('pm1_cf_1'),
            pm2_5_cf_1=item.get('pm2_5_cf_1'),
            pm10_cf_1=item.get('pm10_cf_1'),
            # pm1_atm=item.get('pm1_cf_1'),  # this is before 2019/10/20
            # pm2_5_atm=item.get('pm2_5_cf_1'),
            # pm10_atm=item.get('pm10_cf_1'),
            # pm1_cf_1=item.get('pm1_atm'),
            # pm2_5_cf_1=item.get('pm2_5_atm'),
            # pm10_cf_1=item.get('pm10_atm'),
            p_0_3um_cnt=item.get('p_0_3um_cnt'),
            p_0_5um_cnt=item.get('p_0_5um_cnt'),
            p_1_0um_cnt=item.get('p_1_0um_cnt'),
            p_2_5um_cnt=item.get('p_2_5um_cnt'),
            p_5um_cnt=item.get('p_5um_cnt'),
            p_10um_cnt=item.get('p_10um_cnt'),
            rssi=item.get('rssi'),
            temperature=item.get('temperature'),
            humidity=item.get('humidity'))
        data_obj.append(obj)
    session.add_all(data_obj)
    session.commit()
def main(config):

    start_time = config['START_TIME']
    end_time = config['END_TIME']
    tz = pytz.timezone('America/Los_Angeles')
    time_list = pd.date_range(start=start_time, end=end_time, freq='H')
    time_list = [tz.localize(x) for x in time_list]
    table_obj = config['TABLE']
    new_table_obj = config['NEW_TABLE']

    fields = ['pm1_atm', 'pm2_5_atm', 'pm10_atm', 'pm1_cf_1', 'pm2_5_cf_1', 'pm10_cf_1', 'p_0_3um_cnt', 'p_0_5um_cnt',
               'p_1_0um_cnt', 'p_2_5um_cnt', 'p_5um_cnt', 'p_10um_cnt', 'rssi', 'temperature', 'humidity']

    for i, t in enumerate(time_list[:-1]):

        sql_statement = session.query(table_obj).filter(table_obj.timestamp >= time_list[i], table_obj.timestamp < time_list[i + 1])
        df = pd.read_sql(sql_statement.statement, session.bind)[['sensor_id', 'channel'] + fields]

        if len(df) == 0:
            continue

        def preprocessing(x):
            x_mean, x_std = x.mean(skipna=True), x.std(skipna=True)
            x_left, x_right = x_mean - x_std, x_mean + x_std
            new_x = ((x >= x_left) & (x <= x_right)) * x
            new_x = new_x.replace({0: np.nan})
            return new_x.mean(skipna=True)

        agg_df = df.groupby(['sensor_id', 'channel']).apply(lambda x: preprocessing(x[fields])).round(5)
        agg_df = agg_df[fields].reset_index()
        agg_df = agg_df.replace({np.nan: None})

        agg_data = []
        for _, row in agg_df.iterrows():
            agg_data_obj = new_table_obj(
                sensor_id=row['sensor_id'],
                channel=row['channel'],
                timestamp=time_list[i],
                pm1_atm=row['pm1_atm'],
                pm2_5_atm=row['pm2_5_atm'],
                pm10_atm=row['pm10_atm'],
                pm1_cf_1=row['pm1_cf_1'],
                pm2_5_cf_1=row['pm2_5_cf_1'],
                pm10_cf_1=row['pm10_cf_1'],
                p_0_3um_cnt=row['p_0_3um_cnt'],
                p_0_5um_cnt=row['p_0_5um_cnt'],
                p_1_0um_cnt=row['p_1_0um_cnt'],
                p_2_5um_cnt=row['p_2_5um_cnt'],
                p_5um_cnt=row['p_5um_cnt'],
                p_10um_cnt=row['p_10um_cnt'],
                rssi=row['rssi'],
                temperature=row['temperature'],
                humidity=row['humidity'])
            agg_data.append(agg_data_obj)
        session.add_all(agg_data)
        session.commit()
    print('Finish one table.')
예제 #4
0
def insert_new_station_id(new_station_id, lon, lat):

    point = 'SRID=4326;POINT({} {})'.format(lon, lat)
    obj = LosAngelesEPALocation(station_id=new_station_id,
                                lon=lon,
                                lat=lat,
                                location=point,
                                elevation=0.0)
    session.add(obj)
    session.commit()
def insert_new_air_quality_data(air_quality_data):
    for item in air_quality_data:
        obj = LosAngelesEPA(station_id=item['station_id'],
                            date_observed=item['date_observed'],
                            parameter_name=item['parameter_name'],
                            concentration=item['concentration'],
                            unit=item['unit'],
                            aqi=item['aqi'],
                            category_number=item['category_number'])
        session.add(obj)
        session.commit()
def construct_geo_vector(**kwargs):

    geo_feature_obj = kwargs['geo_feature_obj']
    coord_obj = kwargs['coord_obj']
    geo_vector_obj = kwargs['geo_vector_obj']
    geo_name_obj = kwargs['geo_name_obj']

    locations = sorted([i[0] for i in session.query(coord_obj.gid).all()])
    geo_name_df = pd.read_sql(
        session.query(geo_name_obj.name).statement, session.bind)

    try:
        for loc in locations:

            geo_data_sql = session.query(geo_feature_obj.value, func.concat(
                geo_feature_obj.geo_feature, '_', geo_feature_obj.feature_type).label('name')) \
                .filter(geo_feature_obj.gid == loc).statement

            geo_data_df = pd.read_sql(geo_data_sql, session.bind)
            geo_data = geo_name_df.merge(geo_data_df, on='name', how='left')
            geo_data = geo_data['value'].fillna(0.0)

            coord = session.query(
                coord_obj.lon,
                coord_obj.lat).filter(coord_obj.gid == loc).first()
            obj_result = geo_vector_obj(gid=loc,
                                        data=list(geo_data) + list(coord))

            session.add(obj_result)
            session.commit()

            if loc % 1000 == 0:
                print('Geo Vector {} has finished.'.format(
                    len(list(geo_data) + list(coord))))

        # adding lon, lat into geo feature names
        obj_results = [
            geo_name_obj(name='lon',
                         geo_feature='location',
                         feature_type='lon'),
            geo_name_obj(name='lat',
                         geo_feature='location',
                         feature_type='lat')
        ]
        # session.add_all(obj_results)
        # session.commit()

        return

    except Exception as e:
        print(e)
        exit(-1)
예제 #7
0
def insert_locations(data, location_table_obj):
    data_obj = []
    for item in data:
        obj = location_table_obj(
            sensor_id=item.get('sensor_id'),
            parent_id=item.get('parent_id'),
            channel=item.get('channel'),
            label=item.get('label'),
            device_location_type=item.get('device_location_type'),
            thingspeak_primary_id=item.get('thingspeak_primary_id'),
            thingspeak_primary_id_read_key=item.get('thingspeak_primary_id_read_key'),
            thingspeak_second_id=item.get('thingspeak_second_id'),
            thingspeak_second_id_read_key=item.get('thingspeak_second_id_read_key'),
            lon=item.get('lon'),
            lat=item.get('lat'),
            location='SRID=4326;POINT({} {})'.format(item.get('lon'), item.get('lat')))
        data_obj.append(obj)
    session.add_all(data_obj)
    session.commit()
def interpolate_time(old_obj, target_obj, time_list, features):

    try:
        time_df = pd.DataFrame(time_list,
                               columns=['timestamp']).set_index(['timestamp'])
        locations = session.query(old_obj.gid).distinct(old_obj.gid).all()
        locations = sorted([loc[0] for loc in locations])

        for loc in locations:

            data = session.query(old_obj.timestamp, *features).filter(old_obj.gid == loc)\
                .order_by(old_obj.timestamp).all()

            df = pd.DataFrame(data, columns=['timestamp'] +
                              features).set_index(['timestamp'])
            df = df.loc[~df.index.duplicated(
                keep='first')]  # remove the potential duplicates in index
            df = df.join(time_df, how='right').sort_index()
            # df['wind_bearing'] = df['wind_bearing'].apply(lambda x: x - 360 if x > 180 else x)

            inter_data = df.interpolate(method='linear').reset_index()

            obj_results = [
                target_obj(gid=loc, timestamp=dt[0], data=dt[1:])
                for dt in inter_data.values.tolist()
            ]
            session.add_all(obj_results)
            session.commit()

            print('Location {} has finished. {} records has been generated.'.
                  format(loc, len(inter_data)))
        return

    except Exception as e:
        print(e)
        exit(-1)