def insert_new_data(data, darksky_obj): new_data = [] for item in data: obj = darksky_obj(gid=item['gid'], timestamp=item['timestamp'], summary=item.get('summary'), icon=item.get('icon'), precip_intensity=item.get('precipIntensity'), precip_probability=item.get('precipProbability'), temperature=item.get('temperature'), apparent_temperature=item.get('apparentTemperature'), dew_point=item.get('dewPoint'), humidity=item.get('humidity'), pressure=item.get('pressure'), wind_speed=item.get('windSpeed'), wind_bearing=item.get('windBearing'), cloud_cover=item.get('cloudCover'), uv_index=item.get('uvIndex'), visibility=item.get('visibility'), ozone=item.get('ozone')) new_data.append(obj) session.add_all(new_data) session.commit()
def insert_ppa_data(data, data_table_obj): data_obj = [] for item in data: if len(item) == 0 or item.get('sensor_id') is None: continue obj = data_table_obj( sensor_id=item.get('sensor_id'), channel=item.get('channel'), timestamp=item.get('timestamp'), pm1_atm=item.get('pm1_atm'), pm2_5_atm=item.get('pm2_5_atm'), pm10_atm=item.get('pm10_atm'), pm1_cf_1=item.get('pm1_cf_1'), pm2_5_cf_1=item.get('pm2_5_cf_1'), pm10_cf_1=item.get('pm10_cf_1'), # pm1_atm=item.get('pm1_cf_1'), # this is before 2019/10/20 # pm2_5_atm=item.get('pm2_5_cf_1'), # pm10_atm=item.get('pm10_cf_1'), # pm1_cf_1=item.get('pm1_atm'), # pm2_5_cf_1=item.get('pm2_5_atm'), # pm10_cf_1=item.get('pm10_atm'), p_0_3um_cnt=item.get('p_0_3um_cnt'), p_0_5um_cnt=item.get('p_0_5um_cnt'), p_1_0um_cnt=item.get('p_1_0um_cnt'), p_2_5um_cnt=item.get('p_2_5um_cnt'), p_5um_cnt=item.get('p_5um_cnt'), p_10um_cnt=item.get('p_10um_cnt'), rssi=item.get('rssi'), temperature=item.get('temperature'), humidity=item.get('humidity')) data_obj.append(obj) session.add_all(data_obj) session.commit()
def main(config): start_time = config['START_TIME'] end_time = config['END_TIME'] tz = pytz.timezone('America/Los_Angeles') time_list = pd.date_range(start=start_time, end=end_time, freq='H') time_list = [tz.localize(x) for x in time_list] table_obj = config['TABLE'] new_table_obj = config['NEW_TABLE'] fields = ['pm1_atm', 'pm2_5_atm', 'pm10_atm', 'pm1_cf_1', 'pm2_5_cf_1', 'pm10_cf_1', 'p_0_3um_cnt', 'p_0_5um_cnt', 'p_1_0um_cnt', 'p_2_5um_cnt', 'p_5um_cnt', 'p_10um_cnt', 'rssi', 'temperature', 'humidity'] for i, t in enumerate(time_list[:-1]): sql_statement = session.query(table_obj).filter(table_obj.timestamp >= time_list[i], table_obj.timestamp < time_list[i + 1]) df = pd.read_sql(sql_statement.statement, session.bind)[['sensor_id', 'channel'] + fields] if len(df) == 0: continue def preprocessing(x): x_mean, x_std = x.mean(skipna=True), x.std(skipna=True) x_left, x_right = x_mean - x_std, x_mean + x_std new_x = ((x >= x_left) & (x <= x_right)) * x new_x = new_x.replace({0: np.nan}) return new_x.mean(skipna=True) agg_df = df.groupby(['sensor_id', 'channel']).apply(lambda x: preprocessing(x[fields])).round(5) agg_df = agg_df[fields].reset_index() agg_df = agg_df.replace({np.nan: None}) agg_data = [] for _, row in agg_df.iterrows(): agg_data_obj = new_table_obj( sensor_id=row['sensor_id'], channel=row['channel'], timestamp=time_list[i], pm1_atm=row['pm1_atm'], pm2_5_atm=row['pm2_5_atm'], pm10_atm=row['pm10_atm'], pm1_cf_1=row['pm1_cf_1'], pm2_5_cf_1=row['pm2_5_cf_1'], pm10_cf_1=row['pm10_cf_1'], p_0_3um_cnt=row['p_0_3um_cnt'], p_0_5um_cnt=row['p_0_5um_cnt'], p_1_0um_cnt=row['p_1_0um_cnt'], p_2_5um_cnt=row['p_2_5um_cnt'], p_5um_cnt=row['p_5um_cnt'], p_10um_cnt=row['p_10um_cnt'], rssi=row['rssi'], temperature=row['temperature'], humidity=row['humidity']) agg_data.append(agg_data_obj) session.add_all(agg_data) session.commit() print('Finish one table.')
def insert_new_station_id(new_station_id, lon, lat): point = 'SRID=4326;POINT({} {})'.format(lon, lat) obj = LosAngelesEPALocation(station_id=new_station_id, lon=lon, lat=lat, location=point, elevation=0.0) session.add(obj) session.commit()
def insert_new_air_quality_data(air_quality_data): for item in air_quality_data: obj = LosAngelesEPA(station_id=item['station_id'], date_observed=item['date_observed'], parameter_name=item['parameter_name'], concentration=item['concentration'], unit=item['unit'], aqi=item['aqi'], category_number=item['category_number']) session.add(obj) session.commit()
def construct_geo_vector(**kwargs): geo_feature_obj = kwargs['geo_feature_obj'] coord_obj = kwargs['coord_obj'] geo_vector_obj = kwargs['geo_vector_obj'] geo_name_obj = kwargs['geo_name_obj'] locations = sorted([i[0] for i in session.query(coord_obj.gid).all()]) geo_name_df = pd.read_sql( session.query(geo_name_obj.name).statement, session.bind) try: for loc in locations: geo_data_sql = session.query(geo_feature_obj.value, func.concat( geo_feature_obj.geo_feature, '_', geo_feature_obj.feature_type).label('name')) \ .filter(geo_feature_obj.gid == loc).statement geo_data_df = pd.read_sql(geo_data_sql, session.bind) geo_data = geo_name_df.merge(geo_data_df, on='name', how='left') geo_data = geo_data['value'].fillna(0.0) coord = session.query( coord_obj.lon, coord_obj.lat).filter(coord_obj.gid == loc).first() obj_result = geo_vector_obj(gid=loc, data=list(geo_data) + list(coord)) session.add(obj_result) session.commit() if loc % 1000 == 0: print('Geo Vector {} has finished.'.format( len(list(geo_data) + list(coord)))) # adding lon, lat into geo feature names obj_results = [ geo_name_obj(name='lon', geo_feature='location', feature_type='lon'), geo_name_obj(name='lat', geo_feature='location', feature_type='lat') ] # session.add_all(obj_results) # session.commit() return except Exception as e: print(e) exit(-1)
def insert_locations(data, location_table_obj): data_obj = [] for item in data: obj = location_table_obj( sensor_id=item.get('sensor_id'), parent_id=item.get('parent_id'), channel=item.get('channel'), label=item.get('label'), device_location_type=item.get('device_location_type'), thingspeak_primary_id=item.get('thingspeak_primary_id'), thingspeak_primary_id_read_key=item.get('thingspeak_primary_id_read_key'), thingspeak_second_id=item.get('thingspeak_second_id'), thingspeak_second_id_read_key=item.get('thingspeak_second_id_read_key'), lon=item.get('lon'), lat=item.get('lat'), location='SRID=4326;POINT({} {})'.format(item.get('lon'), item.get('lat'))) data_obj.append(obj) session.add_all(data_obj) session.commit()
def interpolate_time(old_obj, target_obj, time_list, features): try: time_df = pd.DataFrame(time_list, columns=['timestamp']).set_index(['timestamp']) locations = session.query(old_obj.gid).distinct(old_obj.gid).all() locations = sorted([loc[0] for loc in locations]) for loc in locations: data = session.query(old_obj.timestamp, *features).filter(old_obj.gid == loc)\ .order_by(old_obj.timestamp).all() df = pd.DataFrame(data, columns=['timestamp'] + features).set_index(['timestamp']) df = df.loc[~df.index.duplicated( keep='first')] # remove the potential duplicates in index df = df.join(time_df, how='right').sort_index() # df['wind_bearing'] = df['wind_bearing'].apply(lambda x: x - 360 if x > 180 else x) inter_data = df.interpolate(method='linear').reset_index() obj_results = [ target_obj(gid=loc, timestamp=dt[0], data=dt[1:]) for dt in inter_data.values.tolist() ] session.add_all(obj_results) session.commit() print('Location {} has finished. {} records has been generated.'. format(loc, len(inter_data))) return except Exception as e: print(e) exit(-1)