def pp_construct_data_dict(args): (sensor_uuid, sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) = args log.info('sampling sensor uuid ' + sensor_uuid) log.info('-' * 20) len_time_slots = len(time_slots) ret = None # sensor value is read by time dict_sensor_val, dict_stime, utc_t, val =\ get_val_timelet(sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) dict_sensor_val_temp = np.array([np.asarray(v) for v in dict_sensor_val]) dict_stime_temp = np.array([np.asarray(t) for t in dict_stime]) utc_t_val_temp = np.asarray([utc_t, val]) if dict_sensor_val == -1: log.debug('append purge list: dict_sensor_val=-1 ' + sensor_uuid) # return an empty array to indicate that this uuid has to be purged ret = (sensor_uuid, []) elif len(utc_t) < len_time_slots: log.debug('append purge list:len(utc_t)<len_time_slots' + sensor_uuid) ret = (sensor_uuid, []) elif len(val) < len_time_slots: log.debug('append purge list:len(val)<len_time_slots' + sensor_uuid) ret = (sensor_uuid, []) else: ret = (sensor_uuid, [dict_stime_temp, dict_sensor_val_temp, utc_t_val_temp]) return ret
def read_sensor_data(sensor_hash, start_time, end_time): from log_util import log sensor_data = dict() for stitle, uid in sensor_hash.iteritems(): tsvals = read_quasar_url(uid, start_time, end_time) if tsvals is None or len(tsvals) == 0: log.critical(stitle + " (" + uid + ") is unavailable from " + str(start_time) + " to " + str(end_time)) else: log.debug(uid + " : " + stitle + " : TS-VAL size " + str(len(tsvals))) """ log.info(uid + " : " + stitle + " : TS-VAL reading saved in JSON format...") with open(JSON_DIR + "reading-" + uid + ".json", 'w') as f: f.write(simplejson.dumps(tsvals)) """ sensor_data.update({stitle: tsvals}) return sensor_data
def read_sensor_data(sensor_hash, start_time, end_time): from log_util import log client = InfluxDBClient('ddea-tsdb', 8086, 'ddea', 'ddea', 'ddea') sensor_data = dict() for stitle, uid in sensor_hash.iteritems(): tsvals = read_influx_url(client, uid, start_time, end_time) if tsvals is None or len(tsvals) == 0: log.critical(stitle + " (" + uid + ") is unavailable from " + str(start_time) + " to " + str(end_time)) else: log.debug(uid + " : " + stitle + " : TS-VAL size " + str(len(tsvals))) """ log.info(uid + " : " + stitle + " : TS-VAL reading saved in JSON format...") with open(JSON_DIR + "reading-" + uid + ".json", 'w') as f: f.write(simplejson.dumps(tsvals)) """ sensor_data.update({stitle: tsvals}) return sensor_data
def get_weather_timelet(data_dict,t_slots, timelet_inv, use_weather_data_bin=True): log.info('------------------------------------') log.info('Retrieving weather data... ') log.info('------------------------------------') t_start = t_slots[0] t_end = t_slots[-1] log.info('start time: ' + str(t_start) + ' ~ end time: ' + str(t_end)) # Date iteration given start time and end-time # Iterate for each day for all weather data types for date_idx, date in enumerate(daterange(t_start, t_end, inclusive=True)): log.info("weather date : " + date.strftime("%Y-%m-%d")) temp = date.strftime("%Y,%m,%d").rsplit(',') if use_weather_data_bin: filename = WEATHER_DIR + "%04d_%02d_%02d.bin"%(int(temp[0]), int(temp[1]), int(temp[2])) data_day = mt.loadObjectBinaryFast(filename) else: data_day = rw.retrieve_data('SDH', int(temp[0]), int(temp[1]), int(temp[2]), view='d') # split the data into t data_day = data_day.split('\n') # Iterate for each time index(h_idx) of a day for all weather data types for h_idx, hour_sample in enumerate(data_day): hour_samples = hour_sample.split(',') # Initialize weather data lists of dictionary # The first row is always the list of weather data types if (h_idx == 0) and (date_idx == 0): sensor_name_list = hour_sample.split(',') sensor_name_list = [sensor_name.replace('/', '-') for sensor_name in sensor_name_list] for sample_idx, each_sample in enumerate(hour_samples): sensor_name = sensor_name_list[sample_idx] sensor_read = [[] for i in range(len(t_slots))] stime_read = [[] for i in range(len(t_slots))] # Creat the list of lists for minute index utc_t = [] val = [] #data_dict.update({sensor_name:sensor_read}) #data_dict.update({sensor_name:zip(mtime_read,sensor_read)}) data_dict.update({sensor_name: [stime_read, sensor_read, [utc_t, val]]}) elif h_idx > 0: ################################################################ # 'DateUTC' is the one sample_DateUTC = hour_samples[sensor_name_list.index('DateUTC')] # convert to UTC time to VTT local time. utc_dt = dt.datetime.strptime(sample_DateUTC, "%Y-%m-%d %H:%M:%S") vtt_dt_aware = utc_dt.replace(tzinfo=from_zone).astimezone(to_zone) # convert to offset-naive from offset-aware datetimes vtt_dt = dt.datetime(*(vtt_dt_aware.timetuple()[:6])) ### WARNING: vtt_utc is not utc #log.warn("vtt_utc is not utc") vtt_utc = dtime_to_unix([vtt_dt]) # Check boundary condition if int((vtt_dt - t_slots[0]).total_seconds()) < 0 or int((vtt_dt - t_slots[-1]).total_seconds()) >= timelet_inv.seconds: log.debug('skipping weather data out of analysis range...') continue slot_idx = int((vtt_dt - t_slots[0]).total_seconds() / timelet_inv.seconds) cur_sec_val = (vtt_dt - t_slots[slot_idx]).total_seconds() if cur_sec_val >= timelet_inv.seconds: log.critical('sec: ' + str(cur_sec_val)) raise NameError('Seconds from an hour idx cannot be greater than '+str(timelet_inv.seconds) +'secs') # time slot index a given weather sample time try: for sample_idx, each_sample in enumerate(hour_samples): # convert string type to float time if possible try: each_sample = float(each_sample) except ValueError: each_sample = each_sample sensor_name = sensor_name_list[sample_idx] if sensor_name in data_dict: if each_sample != 'N/A' and each_sample !=[]: #data_dict[sensor_name][vtt_dt_idx].append(each_sample) data_dict[sensor_name][0][slot_idx].append(cur_sec_val) data_dict[sensor_name][1][slot_idx].append(each_sample) data_dict[sensor_name][2][0].append(vtt_utc) data_dict[sensor_name][2][1].append(each_sample) else: raise NameError('Inconsistency in the list of weather data') except ValueError: slot_idx = -1 # hour_sample is list of weather filed name, discard else: hour_sample = list() return sensor_name_list
def construct_data_dict(sensor_data, ans_start_t, ans_end_t, timelet_inv, include_weather=1, PARALLEL=False): log.info('-' * 80) log.info('mapping sensor list into hasing table using dictionary') log.info('Align sensor data into a single time_slots referece... from ' + str(ans_start_t) + ' to ' + str(ans_end_t)) log.info('-' * 80) # Variable Declare and initialization time_slots = list() start = ans_start_t while start < ans_end_t: time_slots.append(start) start = start + timelet_inv # Data dictionary # All sensor and weather data is processed and structred into # a consistent single data format -- Dictionary data_dict = dict() sensor_list = list() purge_list = list() # Data Access is following .... #data_dict[key][time_slot_idx][(min_idx=0 or values=1)] if PARALLEL: log.info("construct_data_dict >>> Parallel enabled") args = [(sensor_uuid, sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) for sensor_uuid, sensor_reading in sensor_data.iteritems() ] p = Pool(CPU_CORE_NUM) timed_vlist = p.map(pp_construct_data_dict, args) p.close() p.join() for v in timed_vlist: sensor_uuid, timed_value = v if len(timed_value): sensor_list.append(sensor_uuid) data_dict.update({sensor_uuid: timed_value}) else: purge_list.append(sensor_uuid) else: for sensor_uuid, sensor_reading in sensor_data.iteritems(): log.info('sampling sensor uuid ' + sensor_uuid) len_time_slots = len(time_slots) # sensor value is read by time dict_sensor_val, dict_stime, utc_t, val =\ get_val_timelet(sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) if dict_sensor_val == -1: log.debug('append purge list: dict_sensor_val=-1 ' + sensor_uuid) purge_list.append(sensor_uuid) elif len(utc_t) < len_time_slots: log.debug('append purge list:len(utc_t)<len_time_slots' + sensor_uuid) purge_list.append(sensor_uuid) elif len(val) < len_time_slots: log.debug('append purge list:len(val)<len_time_slots' + sensor_uuid) purge_list.append(sensor_uuid) else: sensor_list.append(sensor_uuid) # Convert list to array type for bin file size and loading time, dict_sensor_val_temp = np.array([np.asarray(val_) for val_ in dict_sensor_val]) dict_stime_temp = np.array([np.asarray(t_) for t_ in dict_stime]) utc_t_val_temp = np.asarray([utc_t, val]) data_dict.update({sensor_uuid: [dict_stime_temp, dict_sensor_val_temp, utc_t_val_temp]}) log.info('-' * 20) data_dict.update({'time_slots': time_slots}) log.info('-' * 40) # directly access internet if include_weather == 1: log.info("Construction weather dict") #weather_list -that is pretty much fixed from database #(*) is the data to be used for our analysis #0 TimeEEST #1 TemperatureC (*) #2 Dew PointC (*) #3 Humidity (*) #4 Sea Level PressurehPa #5 VisibilityKm #6 Wind Direction #7 Wind SpeedKm/h #8 Gust SpeedKm/h #9 Precipitationmm #10 Events (*) #11 Conditions (*) #12 WindDirDegrees #13 DateUTC weather_list = get_weather_timelet(data_dict, time_slots, timelet_inv) # Convert symbols to Integer representaion data_dict['Conditions'][1], Conditions_dict = symbol_to_state(data_dict['Conditions'][1]) data_dict['Events'][1], Events_dict = symbol_to_state(data_dict['Events'][1]) data_dict.update({'sensor_list': sensor_list}) data_dict.update({'weather_list' : weather_list}) data_dict.update({'Conditions_dict': Conditions_dict}) data_dict.update({'Events_dict' : Events_dict}) # Change List to Array type for key_id in weather_list: temp_list = list() for k, list_val_ in enumerate(data_dict[key_id]): temp_list.append(np.asanyarray(list_val_)) data_dict[key_id] = temp_list # use stored bin file elif include_weather == 2: log.info('use weather_dict.bin') # This part to be filled with Khiem...... else: log.info('skip weather database...') return data_dict, purge_list
def get_weather_timelet(data_dict, t_slots, timelet_inv, use_weather_data_bin=True): log.info('------------------------------------') log.info('Retrieving weather data... ') log.info('------------------------------------') t_start = t_slots[0] t_end = t_slots[-1] log.info('start time: ' + str(t_start) + ' ~ end time: ' + str(t_end)) # Date iteration given start time and end-time # Iterate for each day for all weather data types for date_idx, date in enumerate(daterange(t_start, t_end, inclusive=True)): log.info("weather date : " + date.strftime("%Y-%m-%d")) temp = date.strftime("%Y,%m,%d").rsplit(',') if use_weather_data_bin: filename = WEATHER_DIR + "%04d_%02d_%02d.bin" % (int( temp[0]), int(temp[1]), int(temp[2])) data_day = mt.loadObjectBinaryFast(filename) else: data_day = rw.retrieve_data('SDH', int(temp[0]), int(temp[1]), int(temp[2]), view='d') # split the data into t data_day = data_day.split('\n') # Iterate for each time index(h_idx) of a day for all weather data types for h_idx, hour_sample in enumerate(data_day): hour_samples = hour_sample.split(',') # Initialize weather data lists of dictionary # The first row is always the list of weather data types if (h_idx == 0) and (date_idx == 0): sensor_name_list = hour_sample.split(',') sensor_name_list = [ sensor_name.replace('/', '-') for sensor_name in sensor_name_list ] for sample_idx, each_sample in enumerate(hour_samples): sensor_name = sensor_name_list[sample_idx] sensor_read = [[] for i in range(len(t_slots))] stime_read = [[] for i in range(len(t_slots)) ] # Creat the list of lists for minute index utc_t = [] val = [] #data_dict.update({sensor_name:sensor_read}) #data_dict.update({sensor_name:zip(mtime_read,sensor_read)}) data_dict.update( {sensor_name: [stime_read, sensor_read, [utc_t, val]]}) elif h_idx > 0: ################################################################ # 'DateUTC' is the one sample_DateUTC = hour_samples[sensor_name_list.index( 'DateUTC')] # convert to UTC time to VTT local time. utc_dt = dt.datetime.strptime(sample_DateUTC, "%Y-%m-%d %H:%M:%S") vtt_dt_aware = utc_dt.replace( tzinfo=from_zone).astimezone(to_zone) # convert to offset-naive from offset-aware datetimes vtt_dt = dt.datetime(*(vtt_dt_aware.timetuple()[:6])) ### WARNING: vtt_utc is not utc #log.warn("vtt_utc is not utc") vtt_utc = dtime_to_unix([vtt_dt]) # Check boundary condition if int((vtt_dt - t_slots[0]).total_seconds()) < 0 or int( (vtt_dt - t_slots[-1]).total_seconds()) >= timelet_inv.seconds: log.debug('skipping weather data out of analysis range...') continue slot_idx = int((vtt_dt - t_slots[0]).total_seconds() / timelet_inv.seconds) cur_sec_val = (vtt_dt - t_slots[slot_idx]).total_seconds() if cur_sec_val >= timelet_inv.seconds: log.critical('sec: ' + str(cur_sec_val)) raise NameError( 'Seconds from an hour idx cannot be greater than ' + str(timelet_inv.seconds) + 'secs') # time slot index a given weather sample time try: for sample_idx, each_sample in enumerate(hour_samples): # convert string type to float time if possible try: each_sample = float(each_sample) except ValueError: each_sample = each_sample sensor_name = sensor_name_list[sample_idx] if sensor_name in data_dict: if each_sample != 'N/A' and each_sample != []: #data_dict[sensor_name][vtt_dt_idx].append(each_sample) data_dict[sensor_name][0][slot_idx].append( cur_sec_val) data_dict[sensor_name][1][slot_idx].append( each_sample) data_dict[sensor_name][2][0].append(vtt_utc) data_dict[sensor_name][2][1].append( each_sample) else: raise NameError( 'Inconsistency in the list of weather data') except ValueError: slot_idx = -1 # hour_sample is list of weather filed name, discard else: hour_sample = list() return sensor_name_list
def construct_data_dict(sensor_data, ans_start_t, ans_end_t, timelet_inv, include_weather=1, PARALLEL=False): log.info('-' * 80) log.info('mapping sensor list into hasing table using dictionary') log.info('Align sensor data into a single time_slots referece... from ' + str(ans_start_t) + ' to ' + str(ans_end_t)) log.info('-' * 80) # Variable Declare and initialization time_slots = list() start = ans_start_t while start < ans_end_t: time_slots.append(start) start = start + timelet_inv # Data dictionary # All sensor and weather data is processed and structred into # a consistent single data format -- Dictionary data_dict = dict() sensor_list = list() purge_list = list() # Data Access is following .... #data_dict[key][time_slot_idx][(min_idx=0 or values=1)] if PARALLEL: log.info("construct_data_dict >>> Parallel enabled") args = [(sensor_uuid, sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) for sensor_uuid, sensor_reading in sensor_data.iteritems()] p = Pool(CPU_CORE_NUM) timed_vlist = p.map(pp_construct_data_dict, args) p.close() p.join() for v in timed_vlist: sensor_uuid, timed_value = v if len(timed_value): sensor_list.append(sensor_uuid) data_dict.update({sensor_uuid: timed_value}) else: purge_list.append(sensor_uuid) else: for sensor_uuid, sensor_reading in sensor_data.iteritems(): log.info('sampling sensor uuid ' + sensor_uuid) len_time_slots = len(time_slots) # sensor value is read by time dict_sensor_val, dict_stime, utc_t, val =\ get_val_timelet(sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) if dict_sensor_val == -1: log.debug('append purge list: dict_sensor_val=-1 ' + sensor_uuid) purge_list.append(sensor_uuid) elif len(utc_t) < len_time_slots: log.debug('append purge list:len(utc_t)<len_time_slots' + sensor_uuid) purge_list.append(sensor_uuid) elif len(val) < len_time_slots: log.debug('append purge list:len(val)<len_time_slots' + sensor_uuid) purge_list.append(sensor_uuid) else: sensor_list.append(sensor_uuid) # Convert list to array type for bin file size and loading time, dict_sensor_val_temp = np.array( [np.asarray(val_) for val_ in dict_sensor_val]) dict_stime_temp = np.array( [np.asarray(t_) for t_ in dict_stime]) utc_t_val_temp = np.asarray([utc_t, val]) data_dict.update({ sensor_uuid: [dict_stime_temp, dict_sensor_val_temp, utc_t_val_temp] }) log.info('-' * 20) data_dict.update({'time_slots': time_slots}) log.info('-' * 40) # directly access internet if include_weather == 1: log.info("Construction weather dict") #weather_list -that is pretty much fixed from database #(*) is the data to be used for our analysis #0 TimeEEST #1 TemperatureC (*) #2 Dew PointC (*) #3 Humidity (*) #4 Sea Level PressurehPa #5 VisibilityKm #6 Wind Direction #7 Wind SpeedKm/h #8 Gust SpeedKm/h #9 Precipitationmm #10 Events (*) #11 Conditions (*) #12 WindDirDegrees #13 DateUTC weather_list = get_weather_timelet(data_dict, time_slots, timelet_inv) # Convert symbols to Integer representaion data_dict['Conditions'][1], Conditions_dict = symbol_to_state( data_dict['Conditions'][1]) data_dict['Events'][1], Events_dict = symbol_to_state( data_dict['Events'][1]) data_dict.update({'sensor_list': sensor_list}) data_dict.update({'weather_list': weather_list}) data_dict.update({'Conditions_dict': Conditions_dict}) data_dict.update({'Events_dict': Events_dict}) # Change List to Array type for key_id in weather_list: temp_list = list() for k, list_val_ in enumerate(data_dict[key_id]): temp_list.append(np.asanyarray(list_val_)) data_dict[key_id] = temp_list # use stored bin file elif include_weather == 2: log.info('use weather_dict.bin') # This part to be filled with Khiem...... else: log.info('skip weather database...') return data_dict, purge_list