Example #1
0
def pp_construct_data_dict(args):
    (sensor_uuid, sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) = args
    log.info('sampling sensor uuid ' + sensor_uuid)
    log.info('-' * 20)

    len_time_slots = len(time_slots)
    ret = None

    # sensor value is read by time
    dict_sensor_val, dict_stime, utc_t, val =\
        get_val_timelet(sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv)

    dict_sensor_val_temp = np.array([np.asarray(v) for v in dict_sensor_val])

    dict_stime_temp = np.array([np.asarray(t) for t in dict_stime])

    utc_t_val_temp = np.asarray([utc_t, val])

    if dict_sensor_val == -1:
        log.debug('append purge list: dict_sensor_val=-1 ' + sensor_uuid)
        # return an empty array to indicate that this uuid has to be purged
        ret = (sensor_uuid, [])

    elif len(utc_t) < len_time_slots:
        log.debug('append purge list:len(utc_t)<len_time_slots' + sensor_uuid)
        ret = (sensor_uuid, [])

    elif len(val) < len_time_slots:
        log.debug('append purge list:len(val)<len_time_slots' + sensor_uuid)
        ret = (sensor_uuid, [])

    else:
        ret = (sensor_uuid, [dict_stime_temp, dict_sensor_val_temp, utc_t_val_temp])

    return ret
def read_sensor_data(sensor_hash, start_time, end_time):

    from log_util import log

    sensor_data = dict()
    for stitle, uid in sensor_hash.iteritems():
        tsvals = read_quasar_url(uid, start_time, end_time)

        if tsvals is None or len(tsvals) == 0:
            log.critical(stitle + " (" + uid + ") is unavailable from " + str(start_time) + " to " + str(end_time))
        else:
            log.debug(uid + " : " + stitle + " : TS-VAL size " + str(len(tsvals)))

            """
            log.info(uid + " : " + stitle + " : TS-VAL reading saved in JSON format...")
            with open(JSON_DIR + "reading-" + uid + ".json", 'w') as f:
                f.write(simplejson.dumps(tsvals))
            """

            sensor_data.update({stitle: tsvals})

    return sensor_data
Example #3
0
def read_sensor_data(sensor_hash, start_time, end_time):

    from log_util import log
    client = InfluxDBClient('ddea-tsdb', 8086, 'ddea', 'ddea', 'ddea')
    sensor_data = dict()
    for stitle, uid in sensor_hash.iteritems():
        tsvals = read_influx_url(client, uid, start_time, end_time)

        if tsvals is None or len(tsvals) == 0:
            log.critical(stitle + " (" + uid + ") is unavailable from " + str(start_time) + " to " + str(end_time))
        else:
            log.debug(uid + " : " + stitle + " : TS-VAL size " + str(len(tsvals)))

            """
            log.info(uid + " : " + stitle + " : TS-VAL reading saved in JSON format...")
            with open(JSON_DIR + "reading-" + uid + ".json", 'w') as f:
                f.write(simplejson.dumps(tsvals))
            """

            sensor_data.update({stitle: tsvals})

    return sensor_data
Example #4
0
def pp_construct_data_dict(args):
    (sensor_uuid, sensor_reading, time_slots, ans_start_t, ans_end_t,
     timelet_inv) = args
    log.info('sampling sensor uuid ' + sensor_uuid)
    log.info('-' * 20)

    len_time_slots = len(time_slots)
    ret = None

    # sensor value is read by time
    dict_sensor_val, dict_stime, utc_t, val =\
        get_val_timelet(sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv)

    dict_sensor_val_temp = np.array([np.asarray(v) for v in dict_sensor_val])

    dict_stime_temp = np.array([np.asarray(t) for t in dict_stime])

    utc_t_val_temp = np.asarray([utc_t, val])

    if dict_sensor_val == -1:
        log.debug('append purge list: dict_sensor_val=-1 ' + sensor_uuid)
        # return an empty array to indicate that this uuid has to be purged
        ret = (sensor_uuid, [])

    elif len(utc_t) < len_time_slots:
        log.debug('append purge list:len(utc_t)<len_time_slots' + sensor_uuid)
        ret = (sensor_uuid, [])

    elif len(val) < len_time_slots:
        log.debug('append purge list:len(val)<len_time_slots' + sensor_uuid)
        ret = (sensor_uuid, [])

    else:
        ret = (sensor_uuid,
               [dict_stime_temp, dict_sensor_val_temp, utc_t_val_temp])

    return ret
Example #5
0
def get_weather_timelet(data_dict,t_slots, timelet_inv, use_weather_data_bin=True):

    log.info('------------------------------------')
    log.info('Retrieving weather data... ')
    log.info('------------------------------------')
    t_start = t_slots[0]
    t_end = t_slots[-1]
    log.info('start time: ' + str(t_start) + ' ~ end time: ' + str(t_end))

    # Date iteration given start time and end-time
    # Iterate for each day for all weather data types
    for date_idx, date in enumerate(daterange(t_start, t_end, inclusive=True)):
        log.info("weather date : " + date.strftime("%Y-%m-%d"))

        temp = date.strftime("%Y,%m,%d").rsplit(',')

        if use_weather_data_bin:
            filename = WEATHER_DIR + "%04d_%02d_%02d.bin"%(int(temp[0]), int(temp[1]), int(temp[2]))
            data_day = mt.loadObjectBinaryFast(filename)
        else:
            data_day = rw.retrieve_data('SDH', int(temp[0]), int(temp[1]), int(temp[2]), view='d')

        # split the data into t
        data_day = data_day.split('\n')

        # Iterate for each time index(h_idx) of a day  for all weather data types
        for h_idx, hour_sample in enumerate(data_day):

            hour_samples = hour_sample.split(',')

            # Initialize weather data lists of dictionary
            # The first row is always the list of weather data types
            if (h_idx == 0) and (date_idx == 0):

                sensor_name_list = hour_sample.split(',')
                sensor_name_list = [sensor_name.replace('/', '-') for sensor_name in sensor_name_list]

                for sample_idx, each_sample in enumerate(hour_samples):
                    sensor_name = sensor_name_list[sample_idx]
                    sensor_read = [[] for i in range(len(t_slots))]
                    stime_read = [[] for i in range(len(t_slots))] # Creat the list of lists for minute index
                    utc_t = []
                    val = []
                    #data_dict.update({sensor_name:sensor_read})
                    #data_dict.update({sensor_name:zip(mtime_read,sensor_read)})
                    data_dict.update({sensor_name: [stime_read, sensor_read, [utc_t, val]]})

            elif h_idx > 0:
                ################################################################
                # 'DateUTC' is the one
                sample_DateUTC = hour_samples[sensor_name_list.index('DateUTC')]

                # convert to UTC time to VTT local time.
                utc_dt = dt.datetime.strptime(sample_DateUTC, "%Y-%m-%d %H:%M:%S")
                vtt_dt_aware = utc_dt.replace(tzinfo=from_zone).astimezone(to_zone)

                # convert to offset-naive from offset-aware datetimes
                vtt_dt = dt.datetime(*(vtt_dt_aware.timetuple()[:6]))

                ### WARNING: vtt_utc is not utc
                #log.warn("vtt_utc is not utc")
                vtt_utc = dtime_to_unix([vtt_dt])

                # Check boundary condition
                if int((vtt_dt - t_slots[0]).total_seconds()) < 0 or int((vtt_dt - t_slots[-1]).total_seconds()) >= timelet_inv.seconds:
                    log.debug('skipping weather data out of analysis range...')
                    continue

                slot_idx = int((vtt_dt - t_slots[0]).total_seconds() / timelet_inv.seconds)
                cur_sec_val = (vtt_dt - t_slots[slot_idx]).total_seconds()

                if cur_sec_val >= timelet_inv.seconds:
                    log.critical('sec: ' + str(cur_sec_val))
                    raise NameError('Seconds from an hour idx cannot be greater than '+str(timelet_inv.seconds) +'secs')

                # time slot index a given weather sample time
                try:

                    for sample_idx, each_sample in enumerate(hour_samples):

                        # convert string type to float time if possible
                        try:
                            each_sample = float(each_sample)
                        except ValueError:
                            each_sample = each_sample

                        sensor_name = sensor_name_list[sample_idx]

                        if sensor_name in data_dict:
                            if each_sample != 'N/A' and each_sample !=[]:
                                #data_dict[sensor_name][vtt_dt_idx].append(each_sample)
                                data_dict[sensor_name][0][slot_idx].append(cur_sec_val)
                                data_dict[sensor_name][1][slot_idx].append(each_sample)
                                data_dict[sensor_name][2][0].append(vtt_utc)
                                data_dict[sensor_name][2][1].append(each_sample)

                        else:
                            raise NameError('Inconsistency in the list of weather data')

                except ValueError:
                    slot_idx = -1

            # hour_sample is list of weather filed name, discard
            else:

                hour_sample = list()

    return sensor_name_list
Example #6
0
def construct_data_dict(sensor_data, ans_start_t, ans_end_t, timelet_inv, include_weather=1, PARALLEL=False):

    log.info('-' * 80)
    log.info('mapping sensor list into hasing table using dictionary')
    log.info('Align sensor data into a single time_slots referece... from ' + str(ans_start_t) + ' to ' + str(ans_end_t))
    log.info('-' * 80)

    # Variable Declare and initialization
    time_slots = list()
    start = ans_start_t
    while start < ans_end_t:
        time_slots.append(start)
        start = start + timelet_inv

    # Data dictionary
    # All sensor and weather data is processed and structred into
    # a consistent single data format -- Dictionary
    data_dict = dict()
    sensor_list = list()
    purge_list = list()

    # Data Access is following ....
    #data_dict[key][time_slot_idx][(min_idx=0 or values=1)]

    if PARALLEL:

        log.info("construct_data_dict >>> Parallel enabled")
        args = [(sensor_uuid, sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv) for sensor_uuid, sensor_reading in sensor_data.iteritems() ]

        p = Pool(CPU_CORE_NUM)
        timed_vlist = p.map(pp_construct_data_dict, args)
        p.close()
        p.join()

        for v in timed_vlist:
            sensor_uuid, timed_value = v

            if len(timed_value):
                sensor_list.append(sensor_uuid)
                data_dict.update({sensor_uuid: timed_value})

            else:
                purge_list.append(sensor_uuid)

    else:

        for sensor_uuid, sensor_reading in sensor_data.iteritems():

            log.info('sampling sensor uuid ' + sensor_uuid)
            len_time_slots = len(time_slots)

            # sensor value is read by time
            dict_sensor_val, dict_stime, utc_t, val =\
                get_val_timelet(sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv)

            if dict_sensor_val == -1:
                log.debug('append purge list: dict_sensor_val=-1 ' + sensor_uuid)
                purge_list.append(sensor_uuid)

            elif len(utc_t) < len_time_slots:
                log.debug('append purge list:len(utc_t)<len_time_slots' + sensor_uuid)
                purge_list.append(sensor_uuid)

            elif len(val) < len_time_slots:
                log.debug('append purge list:len(val)<len_time_slots' + sensor_uuid)
                purge_list.append(sensor_uuid)

            else:
                sensor_list.append(sensor_uuid)

                # Convert list to array type for bin file size and loading time,
                dict_sensor_val_temp = np.array([np.asarray(val_) for val_ in dict_sensor_val])
                dict_stime_temp = np.array([np.asarray(t_) for t_ in dict_stime])
                utc_t_val_temp = np.asarray([utc_t, val])

                data_dict.update({sensor_uuid: [dict_stime_temp, dict_sensor_val_temp, utc_t_val_temp]})

            log.info('-' * 20)

    data_dict.update({'time_slots': time_slots})
    log.info('-' * 40)

    # directly access internet
    if include_weather == 1:
        log.info("Construction weather dict")
        #weather_list -that is pretty much fixed from database
        #(*) is the data to be used for our analysis
        #0 TimeEEST
        #1 TemperatureC (*)
        #2 Dew PointC (*)
        #3 Humidity (*)
        #4 Sea Level PressurehPa
        #5 VisibilityKm
        #6 Wind Direction
        #7 Wind SpeedKm/h
        #8 Gust SpeedKm/h
        #9 Precipitationmm
        #10 Events (*)
        #11 Conditions (*)
        #12 WindDirDegrees
        #13 DateUTC

        weather_list = get_weather_timelet(data_dict, time_slots, timelet_inv)
        # Convert symbols to Integer representaion

        data_dict['Conditions'][1], Conditions_dict = symbol_to_state(data_dict['Conditions'][1])
        data_dict['Events'][1], Events_dict = symbol_to_state(data_dict['Events'][1])
        data_dict.update({'sensor_list': sensor_list})
        data_dict.update({'weather_list' : weather_list})
        data_dict.update({'Conditions_dict': Conditions_dict})
        data_dict.update({'Events_dict' : Events_dict})

        # Change List to Array type
        for key_id in weather_list:
            temp_list = list()
            for k, list_val_ in enumerate(data_dict[key_id]):
                temp_list.append(np.asanyarray(list_val_))

            data_dict[key_id] = temp_list

    # use stored bin file
    elif include_weather == 2:
        log.info('use weather_dict.bin')
        # This part to be filled with Khiem......

    else:
        log.info('skip weather database...')

    return data_dict, purge_list
Example #7
0
def get_weather_timelet(data_dict,
                        t_slots,
                        timelet_inv,
                        use_weather_data_bin=True):

    log.info('------------------------------------')
    log.info('Retrieving weather data... ')
    log.info('------------------------------------')
    t_start = t_slots[0]
    t_end = t_slots[-1]
    log.info('start time: ' + str(t_start) + ' ~ end time: ' + str(t_end))

    # Date iteration given start time and end-time
    # Iterate for each day for all weather data types
    for date_idx, date in enumerate(daterange(t_start, t_end, inclusive=True)):
        log.info("weather date : " + date.strftime("%Y-%m-%d"))

        temp = date.strftime("%Y,%m,%d").rsplit(',')

        if use_weather_data_bin:
            filename = WEATHER_DIR + "%04d_%02d_%02d.bin" % (int(
                temp[0]), int(temp[1]), int(temp[2]))
            data_day = mt.loadObjectBinaryFast(filename)
        else:
            data_day = rw.retrieve_data('SDH',
                                        int(temp[0]),
                                        int(temp[1]),
                                        int(temp[2]),
                                        view='d')

        # split the data into t
        data_day = data_day.split('\n')

        # Iterate for each time index(h_idx) of a day  for all weather data types
        for h_idx, hour_sample in enumerate(data_day):

            hour_samples = hour_sample.split(',')

            # Initialize weather data lists of dictionary
            # The first row is always the list of weather data types
            if (h_idx == 0) and (date_idx == 0):

                sensor_name_list = hour_sample.split(',')
                sensor_name_list = [
                    sensor_name.replace('/', '-')
                    for sensor_name in sensor_name_list
                ]

                for sample_idx, each_sample in enumerate(hour_samples):
                    sensor_name = sensor_name_list[sample_idx]
                    sensor_read = [[] for i in range(len(t_slots))]
                    stime_read = [[] for i in range(len(t_slots))
                                  ]  # Creat the list of lists for minute index
                    utc_t = []
                    val = []
                    #data_dict.update({sensor_name:sensor_read})
                    #data_dict.update({sensor_name:zip(mtime_read,sensor_read)})
                    data_dict.update(
                        {sensor_name: [stime_read, sensor_read, [utc_t, val]]})

            elif h_idx > 0:
                ################################################################
                # 'DateUTC' is the one
                sample_DateUTC = hour_samples[sensor_name_list.index(
                    'DateUTC')]

                # convert to UTC time to VTT local time.
                utc_dt = dt.datetime.strptime(sample_DateUTC,
                                              "%Y-%m-%d %H:%M:%S")
                vtt_dt_aware = utc_dt.replace(
                    tzinfo=from_zone).astimezone(to_zone)

                # convert to offset-naive from offset-aware datetimes
                vtt_dt = dt.datetime(*(vtt_dt_aware.timetuple()[:6]))

                ### WARNING: vtt_utc is not utc
                #log.warn("vtt_utc is not utc")
                vtt_utc = dtime_to_unix([vtt_dt])

                # Check boundary condition
                if int((vtt_dt - t_slots[0]).total_seconds()) < 0 or int(
                    (vtt_dt -
                     t_slots[-1]).total_seconds()) >= timelet_inv.seconds:
                    log.debug('skipping weather data out of analysis range...')
                    continue

                slot_idx = int((vtt_dt - t_slots[0]).total_seconds() /
                               timelet_inv.seconds)
                cur_sec_val = (vtt_dt - t_slots[slot_idx]).total_seconds()

                if cur_sec_val >= timelet_inv.seconds:
                    log.critical('sec: ' + str(cur_sec_val))
                    raise NameError(
                        'Seconds from an hour idx cannot be greater than ' +
                        str(timelet_inv.seconds) + 'secs')

                # time slot index a given weather sample time
                try:

                    for sample_idx, each_sample in enumerate(hour_samples):

                        # convert string type to float time if possible
                        try:
                            each_sample = float(each_sample)
                        except ValueError:
                            each_sample = each_sample

                        sensor_name = sensor_name_list[sample_idx]

                        if sensor_name in data_dict:
                            if each_sample != 'N/A' and each_sample != []:
                                #data_dict[sensor_name][vtt_dt_idx].append(each_sample)
                                data_dict[sensor_name][0][slot_idx].append(
                                    cur_sec_val)
                                data_dict[sensor_name][1][slot_idx].append(
                                    each_sample)
                                data_dict[sensor_name][2][0].append(vtt_utc)
                                data_dict[sensor_name][2][1].append(
                                    each_sample)

                        else:
                            raise NameError(
                                'Inconsistency in the list of weather data')

                except ValueError:
                    slot_idx = -1

            # hour_sample is list of weather filed name, discard
            else:

                hour_sample = list()

    return sensor_name_list
Example #8
0
def construct_data_dict(sensor_data,
                        ans_start_t,
                        ans_end_t,
                        timelet_inv,
                        include_weather=1,
                        PARALLEL=False):

    log.info('-' * 80)
    log.info('mapping sensor list into hasing table using dictionary')
    log.info('Align sensor data into a single time_slots referece... from ' +
             str(ans_start_t) + ' to ' + str(ans_end_t))
    log.info('-' * 80)

    # Variable Declare and initialization
    time_slots = list()
    start = ans_start_t
    while start < ans_end_t:
        time_slots.append(start)
        start = start + timelet_inv

    # Data dictionary
    # All sensor and weather data is processed and structred into
    # a consistent single data format -- Dictionary
    data_dict = dict()
    sensor_list = list()
    purge_list = list()

    # Data Access is following ....
    #data_dict[key][time_slot_idx][(min_idx=0 or values=1)]

    if PARALLEL:

        log.info("construct_data_dict >>> Parallel enabled")
        args = [(sensor_uuid, sensor_reading, time_slots, ans_start_t,
                 ans_end_t, timelet_inv)
                for sensor_uuid, sensor_reading in sensor_data.iteritems()]

        p = Pool(CPU_CORE_NUM)
        timed_vlist = p.map(pp_construct_data_dict, args)
        p.close()
        p.join()

        for v in timed_vlist:
            sensor_uuid, timed_value = v

            if len(timed_value):
                sensor_list.append(sensor_uuid)
                data_dict.update({sensor_uuid: timed_value})

            else:
                purge_list.append(sensor_uuid)

    else:

        for sensor_uuid, sensor_reading in sensor_data.iteritems():

            log.info('sampling sensor uuid ' + sensor_uuid)
            len_time_slots = len(time_slots)

            # sensor value is read by time
            dict_sensor_val, dict_stime, utc_t, val =\
                get_val_timelet(sensor_reading, time_slots, ans_start_t, ans_end_t, timelet_inv)

            if dict_sensor_val == -1:
                log.debug('append purge list: dict_sensor_val=-1 ' +
                          sensor_uuid)
                purge_list.append(sensor_uuid)

            elif len(utc_t) < len_time_slots:
                log.debug('append purge list:len(utc_t)<len_time_slots' +
                          sensor_uuid)
                purge_list.append(sensor_uuid)

            elif len(val) < len_time_slots:
                log.debug('append purge list:len(val)<len_time_slots' +
                          sensor_uuid)
                purge_list.append(sensor_uuid)

            else:
                sensor_list.append(sensor_uuid)

                # Convert list to array type for bin file size and loading time,
                dict_sensor_val_temp = np.array(
                    [np.asarray(val_) for val_ in dict_sensor_val])
                dict_stime_temp = np.array(
                    [np.asarray(t_) for t_ in dict_stime])
                utc_t_val_temp = np.asarray([utc_t, val])

                data_dict.update({
                    sensor_uuid:
                    [dict_stime_temp, dict_sensor_val_temp, utc_t_val_temp]
                })

            log.info('-' * 20)

    data_dict.update({'time_slots': time_slots})
    log.info('-' * 40)

    # directly access internet
    if include_weather == 1:
        log.info("Construction weather dict")
        #weather_list -that is pretty much fixed from database
        #(*) is the data to be used for our analysis
        #0 TimeEEST
        #1 TemperatureC (*)
        #2 Dew PointC (*)
        #3 Humidity (*)
        #4 Sea Level PressurehPa
        #5 VisibilityKm
        #6 Wind Direction
        #7 Wind SpeedKm/h
        #8 Gust SpeedKm/h
        #9 Precipitationmm
        #10 Events (*)
        #11 Conditions (*)
        #12 WindDirDegrees
        #13 DateUTC

        weather_list = get_weather_timelet(data_dict, time_slots, timelet_inv)
        # Convert symbols to Integer representaion

        data_dict['Conditions'][1], Conditions_dict = symbol_to_state(
            data_dict['Conditions'][1])
        data_dict['Events'][1], Events_dict = symbol_to_state(
            data_dict['Events'][1])
        data_dict.update({'sensor_list': sensor_list})
        data_dict.update({'weather_list': weather_list})
        data_dict.update({'Conditions_dict': Conditions_dict})
        data_dict.update({'Events_dict': Events_dict})

        # Change List to Array type
        for key_id in weather_list:
            temp_list = list()
            for k, list_val_ in enumerate(data_dict[key_id]):
                temp_list.append(np.asanyarray(list_val_))

            data_dict[key_id] = temp_list

    # use stored bin file
    elif include_weather == 2:
        log.info('use weather_dict.bin')
        # This part to be filled with Khiem......

    else:
        log.info('skip weather database...')

    return data_dict, purge_list