Python DataPoint Examples, cerebralcortex.core.datatypes.datastream.DataPoint Python Examples

Example #1

0

Show file

File: phone_screen_touch_features_all_app.py Project: msh-shiplu/CerebralCortex-DataAnalysis

    def label_appusage_intervals(self, data: List[DataPoint], intervals: List,
                                 interval_label: List[str]) -> List[DataPoint]:
        """
        Helper function to label screen touch in a fixed app category usage

        :param List(DataPoint) data: Phone touch screen data stream
        :param List appusage: List appusage: list of app usage duration of specific app categories of the form
                                [start_time, end_time, category]
        :param intervals: List of integers containing screen touch gap as in touch screen timestamp unit (milliseconds)
        :param interval_label: A list of possible type of screen touch which are [typing, pause, reading, unknown]
        :return: Labelled touche interval
        :rtype: List(DataPoint)
        """
        ret = []

        for i in range(1, len(data)):
            last = data[i-1].start_time
            diff = (data[i].start_time - last).total_seconds()
            for j in range(len(interval_label)):
                if intervals[j][0] <= diff <= intervals[j][1]:
                    if len(ret) > 0:
                        last_entry = ret.pop()
                        if last_entry.end_time == last and last_entry.sample == interval_label[j]:
                            ret.append(DataPoint(start_time = last_entry.start_time,
                                                 end_time = data[i].start_time, offset = last_entry.offset,
                                                 sample = last_entry.sample))
                        else:
                            ret.append(last_entry)
                            ret.append(DataPoint(start_time = last, end_time = data[i].start_time,
                                                 offset = data[i].offset, sample=interval_label[j]))
                    else:
                        ret.append(DataPoint(start_time = last, end_time = data[i].start_time,
                                             offset = data[i].offset, sample=interval_label[j]))
                    break
        return ret

Example #2

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def transition_counter(self, semanticdata: object) -> object:
        """
        Number of transitions from one type of place to another.

        :param semanticdata: DataPoint array of semantic stream
        :return: number of transitions from one type of location to another
        :rtype: List(DataPoint) with a single element (dictionary).
        """

        semanticwithouttransit = []
        jj = 0
        while jj < len(semanticdata):
            if (str(semanticdata[jj].sample) != "transit"):
                semanticwithouttransit.append(semanticdata[jj])
            jj = jj + 1

        number_of_trans_dict = {}
        i = 0
        to_work_transitions = 0
        to_home_transitions = 0

        while i < len(semanticwithouttransit) - 1:

            pre_loc = semanticwithouttransit[i]
            post_loc = semanticwithouttransit[i + 1]
            if pre_loc.sample != post_loc.sample:
                key_string = pre_loc.sample + " " + post_loc.sample
                get_pre_num = 0

                if post_loc.sample.lower() == 'work':
                    to_work_transitions += 1
                if post_loc.sample.lower() == 'home':
                    to_home_transitions += 1

                if (key_string in number_of_trans_dict.keys()):
                    get_pre_num = number_of_trans_dict[key_string]
                new_num = get_pre_num + 1
                # print (new_num)
                number_of_trans_dict[key_string] = new_num
            i = i + 1

        start_time = semanticdata[0].start_time
        end_time = semanticdata[-1].end_time
        offset = semanticdata[0].offset

        output_datapoint = DataPoint(start_time, end_time, offset,
                                     number_of_trans_dict)
        to_work_transitions_datapoint = DataPoint(start_time, end_time, offset,
                                                  to_work_transitions)
        to_home_transitions_datapoint = DataPoint(start_time, end_time, offset,
                                                  to_home_transitions)
        toreturn = []
        toreturn.append(output_datapoint)
        toreturn.append(to_work_transitions_datapoint)
        toreturn.append(to_home_transitions_datapoint)
        return toreturn

Example #3

0

Show file

def json_to_datapoints(json_obj):
    if isinstance(json_obj["value"], str):
        sample = json_obj["value"]
    else:
        sample = json.dumps(json_obj["value"])
    start_time = parse(json_obj["starttime"])

    if "endtime" in json_obj:  # Test-code, this if will not be executed
        return DataPoint(start_time=start_time, end_time=json_obj["endtime"], sample=sample)
    else:
        return DataPoint(start_time=start_time, sample=sample)

Example #4

0

Show file

File: gps_daily.py Project: amimithun/CerebralCortex-DataAnalysis

    def split_datapoint_array_by_day(self, data: object) -> object:
        """
        Returns DataPoint array splitted wth respect to days considering localtime.

        :param data: Input data (single DataPoint)
        :return: Splitted list of DataPoints
        :rtype: List(DataPoint)
        """
        data_by_day = []
        for dp in data:
            start_date = dp.start_time.date()
            end_date = dp.end_time.date()
            start_time = dp.start_time
            end_time = dp.end_time
            offset = dp.offset
            timezoneinfo = start_time.tzinfo

            if start_date == end_date:
                data_by_day.append(dp)
                continue

            while (start_date != end_date):
                new_end_time = start_time + timedelta(days=1)
                new_end_date = new_end_time.date()

                new_end_date_str = str(new_end_date).replace("-", "")
                new_end_datetime = datetime.strptime(new_end_date_str,
                                                     "%Y%m%d")

                new_end_datetime = new_end_datetime.replace(
                    tzinfo=timezoneinfo)

                new_datapoint = DataPoint(start_time, new_end_datetime, offset,
                                          dp.sample)

                data_by_day.append(new_datapoint)

                start_date = new_end_date

                start_date_str = str(start_date).replace("-", "")

                start_time = start_time + timedelta(days=1)

            new_start_str = str(start_date).replace("-", "")
            new_start_datetime = datetime.strptime(new_start_str, "%Y%m%d")
            new_start_datetime = new_start_datetime.replace(
                tzinfo=timezoneinfo)
            new_datapoint = DataPoint(new_start_datetime, end_time, offset,
                                      dp.sample)
            data_by_day.append(new_datapoint)

        return data_by_day

Example #5

0

Show file

File: phone_screen_touch_features_all_app.py Project: msh-shiplu/CerebralCortex-DataAnalysis

    def get_screen_touch_rate(self, data: List[DataPoint], typing_episodes: List) -> List[DataPoint]:
        """
        Average screen touch rate for a whole day during typing episodes (only productivity and communication apps are
        considered during calculation)

        :param List(DataPoint) data: screen touch stream data points
        :param List(Tuple) typing_episodes: (start_time, end_time) for each item in the list, the starting and end time
                                            of a typing episode
        :return: A list with single data point containing the average screen touch rate.
        :rtype: List(DataPoint)
        """
        if not data:
            return None
        total_touch_count = 0
        total_typing_time = 0
        for ep in typing_episodes:
            total_typing_time += (ep[1] - ep[0]).total_seconds()
            for d in data:
                if ep[0] <= d.start_time <= ep[1]:
                    total_touch_count += 1

        if total_typing_time == 0 or total_touch_count == 0:
            return None

        start_time = copy.deepcopy(data[0].start_time)
        start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0)
        end_time = datetime.datetime.combine(start_time.date(), datetime.time.max)
        end_time = end_time.replace(tzinfo=data[0].start_time.tzinfo)
        return [DataPoint(start_time=start_time, end_time=end_time, offset=data[0].offset,
                          sample=total_touch_count/total_typing_time)]

Example #6

0

Show file

def row_to_datapoint(row: str) -> dict:
    """
        Format data based on mCerebrum's current GZ-CSV format into what Cerebral
    Cortex expects
    :param row:
    :return:
    """
    ts, offset, values = row.split(',', 2)
    ts = int(ts) / 1000.0
    offset = int(offset)

    if isinstance(values, tuple):
        values = list(values)
    else:
        try:
            values = json.loads(values)
        except:
            try:
                values = [float(values)]
            except:
                try:
                    values = list(map(float, values.split(',')))
                except:
                    values = values

    timezone = datetime.timezone(datetime.timedelta(milliseconds=offset))
    ts = datetime.datetime.fromtimestamp(ts, timezone)
    return DataPoint(start_time=ts, sample=values)

Example #7

0

Show file

File: igtb_qualtrix_data_importer.py Project: nsaleheen/CerebralCortex-DataAnalysis

def save_point(user, value, start_time, end_time, offset, metadata,
               stream_name_suffix):
    dp = DataPoint(start_time=start_time,
                   end_time=end_time,
                   offset=offset,
                   sample=[value])

    metadata_name = metadata['name']
    metadata_name = metadata_name + stream_name_suffix

    output_stream_id = str(
        uuid.uuid3(uuid.NAMESPACE_DNS,
                   str(metadata_name + user + str(metadata))))
    ds = DataStream(identifier=output_stream_id,
                    owner=user,
                    name=metadata_name,
                    data_descriptor=metadata['data_descriptor'],
                    execution_context=metadata['execution_context'],
                    annotations=metadata['annotations'],
                    stream_type=1,
                    data=[dp])
    #print(str(user),str(output_stream_id),len(feature_data[user]))
    try:
        CC.save_stream(ds, localtime=True)
    except Exception as e:
        print(e)

Example #8

0

Show file

    def get_data_yield(self, data: List[DataPoint], max_data_gap_threshold_seconds: float = 300) \
            -> Tuple[List[DataPoint], float]:
        """
        This method produces series of data points containing interval of data present or not. In the sample
        a 0 means data is not present in this interval, 1 means data is there. Also it returns another data points
        with total hour of data is present in the data stream for a the whole day.

        :param List(DataPoint) data: list of data points
        :param float max_data_gap_threshold_seconds: maximum allowed gap in seconds between two consecutive data points
        :return: Interval when the data is available and total duration in hour of tha available data
        :rtype: Tuple(List(DataPoint), float) or Tuple(None, None)
        """
        if not data:
            return None, None

        start_time = datetime.datetime.combine(data[0].start_time.date(), datetime.time.min)
        start_time = start_time.replace(tzinfo=data[0].start_time.tzinfo)
        end_time = datetime.datetime.combine(data[0].start_time.date(), datetime.time.max)
        end_time = end_time.replace(tzinfo=data[0].start_time.tzinfo)
        L = len(data)
        last = start_time
        yield_data = []
        data_duration = datetime.timedelta()

        if (data[0].start_time - start_time).total_seconds() > max_data_gap_threshold_seconds:
            yield_data.append(DataPoint(start_time, data[0].start_time, data[0].offset, 0))
            last = self.get_end_time(data[0])

        i = 1
        s = None
        t = None
        while i < L:
            s = self.get_end_time(data[i - 1])
            t = self.get_end_time(data[i])

            while i < L and (t - s).total_seconds() <= max_data_gap_threshold_seconds:
                i += 1
                if i < L:
                    s = t
                    t = self.get_end_time(data[i])

            if i < len(data):
                yield_data.append(DataPoint(last, s, data[0].offset, 1))
                yield_data.append(DataPoint(s, t, data[0].offset, 0))
                data_duration += (s - last)
                last = data[i].start_time
                i += 1

        if t and (end_time - t).total_seconds() > max_data_gap_threshold_seconds:
            yield_data.append(DataPoint(last, t, data[0].offset, 1))
            yield_data.append(DataPoint(t, end_time, data[0].offset, 0))
            data_duration += (t - last)
        else:
            yield_data.append(DataPoint(last, end_time, data[0].offset, 1))
            data_duration += (end_time - last)

        total_duration_data = [
            DataPoint(start_time, end_time, data[0].offset, round(data_duration.total_seconds() / (60 * 60), 2))]
        return yield_data, total_duration_data

Example #9

0

Show file

File: phone_screen_touch_features_all_app.py Project: msh-shiplu/CerebralCortex-DataAnalysis

    def get_screen_touch_variance_hourly(self, data: List[DataPoint], typing_episodes: List) -> List[DataPoint]:
        """
        This method returns hourly variance of time between two consecutive touch in a typing episode. In case of
        multiple typing episode, variance is calculated for each typing episode and combined using standard formula
        to combine multiple variances.

        :param List(DataPoint) data: screen touch stream data points
        :param List(Tuple) typing_episodes: (start_time, end_time) for each item in the list, the starting and end time
                                            of a typing episode
        :return: A list of variances for each hour (if there is input data for this hour) of a day.
        :rtype: List(DataPoint)
        """
        if len(data) <= 1:
            return None

        combined_data = copy.deepcopy(data)

        for s in combined_data:
            s.end_time = s.start_time

        new_data = []
        tmp_time = copy.deepcopy(combined_data[0].start_time)
        tmp_time = tmp_time.replace(hour=0, minute=0, second=0, microsecond=0)
        for h in range(0, 24):
            datalist = []
            start = tmp_time.replace(hour=h)
            end = start + datetime.timedelta(minutes=59)
            for d in combined_data:
                if start <= d.start_time <= end or start <= d.end_time <= end:
                    datalist.append(d)
            if len(datalist) <= 1:
                continue
            splitted_data = [[]]*len(typing_episodes)
            for i, ep in enumerate(typing_episodes):
                for d in datalist:
                    if ep[0]<= d.start_time and d.end_time <= ep[1]:
                        splitted_data[i].append(d)
            splitted_data = list(filter(lambda x: len(x)>1, splitted_data))
            if not splitted_data:
                continue
            episode_data = list(map(self.inter_event_time_list, splitted_data))
            Xc = np.mean(episode_data)
            var = 0
            n = 0
            for L in episode_data:
                X = np.mean(L)
                V = np.var(L)
                var += len(L) * (V + (X - Xc)*(X - Xc))
                n += len(L)
            var /= n
            if np.isnan(var):
                continue

            new_data.append(DataPoint(start_time=start, end_time=end, offset=combined_data[0].offset,
                                      sample=var))
        return new_data

Example #10

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def radius_of_gyration(self, centroiddatapoints: object) -> object:
        """
        Radius of gyration of a participant in a day.

        :param data: DataPoint array of centroid stream
        :return: radius_of_gyration
        :rtype: List(DataPoint) with a single element.
        """
        data = []

        for dp in centroiddatapoints:
            if (float(dp.sample[0]) != -1.0):
                data.append(dp)

        if len(data) == 0:
            return []

        summed_lattitude = 0
        summed_longitude = 0

        for dp in data:
            summed_lattitude = summed_lattitude + float(dp.sample[1])
            summed_longitude = summed_longitude + float(dp.sample[2])

        mean_lattitude = 0
        mean_longitude = 0

        if len(data) > 0:
            mean_lattitude = summed_lattitude / len(data)
            mean_longitude = summed_longitude / len(data)
        total_time = 0
        time_distance = 0

        for dp in data:
            total_time = total_time + (dp.end_time -
                                       dp.start_time).total_seconds()
            distance = self.haversine(float(dp.sample[2]), float(dp.sample[1]),
                                      mean_longitude, mean_lattitude)
            time_distance = time_distance + (
                (dp.end_time -
                 dp.start_time).total_seconds()) * distance * distance

        rad_of_gyration = 0

        if total_time > 0:
            rad_of_gyration = sqrt(time_distance / total_time)

        start_time = centroiddatapoints[0].start_time
        end_time = centroiddatapoints[-1].end_time
        offset = centroiddatapoints[0].offset

        rad_gyr_datapoint = DataPoint(start_time, end_time, offset,
                                      rad_of_gyration)

        return [rad_gyr_datapoint]

Example #11

0

Show file

def classify_activity(features: List[DataPoint], is_gravity) -> List[DataPoint]:
    clf = get_activity_model(is_gravity)
    labels = []

    prediction_values = [dp.sample for dp in features]
    preds = clf.predict(prediction_values)
    for i, dp in enumerate(features):
        labels.append(DataPoint(start_time=dp.start_time, end_time=dp.end_time,
                                offset=dp.offset, sample=str(preds[i])))

    return labels

Example #12

0

Show file

File: utils.py Project: nesl/WorkDeskProximityDetection

def fill_missing_values(datapoints: List[DataPoint],
                        freq: float) -> List[DataPoint]:
    """
    Introperlate the datapoints based on assigned frequency.
    """

    if not datapoints:
        return datapoints
    if freq == 0.0:
        return datapoints

    # Convert frequency to time intveral in second.
    time_interval = 1.0 / freq
    new_datapoints = list()
    start_t = datapoints[0].start_time
    end_t = datapoints[-1].start_time

    # Create a new list of timestamps with adjacent timestamp separated by time_invertal.
    t = start_t
    new_ts = list()

    # Interpolate the data list
    while t <= end_t:
        new_ts.append(t)
        t += timedelta(seconds=time_interval)
    #print(start_t, end_t)
    #print('# of new dp:', len(new_ts))

    j = 0
    for i in range(len(new_ts)):
        if new_ts[i] >= datapoints[j].start_time:
            new_datapoints.append(
                DataPoint(new_ts[i], None, datapoints[j].offset,
                          datapoints[j].sample))
            j += 1
        else:
            new_datapoints.append(
                DataPoint(new_ts[i], None, datapoints[j - 1].offset,
                          datapoints[j - 1].sample))
    return new_datapoints

Example #13

0

Show file

    def process_day_data(self, user_id: str, day: str, streams: dict):

        raw_led_hrvp_lw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV_PLUS--LEFT_WRIST"
        raw_led_hrvp_rw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV_PLUS--RIGHT_WRIST"
        raw_led_hrv_lw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV--LEFT_WRIST"
        raw_led_hrv_rw = "RAW--CHARACTERISTIC_LED--org.md2k.motionsense--MOTION_SENSE_HRV--RIGHT_WRIST"
        raw_hrv_lw = "RAW--org.md2k.motionsense--MOTION_SENSE_HRV--LEFT_WRIST"
        raw_hrv_wr = "RAW--org.md2k.motionsense--MOTION_SENSE_HRV--RIGHT_WRIST"
        raw_stream_names = [raw_led_hrvp_lw, raw_led_hrvp_rw, raw_led_hrv_lw, raw_led_hrv_rw, raw_hrv_lw, raw_hrv_wr]
        ppg_data = []
        input_streams = []
        try:
            for rs in raw_stream_names:
                if rs not in streams:
                    continue
                data = utils.get_raw_data_by_stream_name(rs, user_id, day, self.CC, localtime=False)
                raw_data = []
                for d in data:
                    if type(d.sample) is list and len(d.sample)!=20:
                        continue
                    if type(d.sample) is str:
                        st = list(map(float, d.sample.strip().split(",")))
                        if len(st)!=20:
                            continue
                        tmp = [tm.mktime(d.start_time.timetuple()), d.offset]
                        tmp += st
                    else:
                        tmp = [tm.mktime(d.start_time.timetuple()), d.offset]
                        tmp += d.sample
                    raw_data.append(tmp)

                if not raw_data:
                    return None
                data = get_realigned_data(np.array(raw_data)).tolist()
                input_streams.append(streams[rs])
                ppg_data += data

            if not ppg_data:
                return
            ppg_data = np.array(sorted(ppg_data))
            offset = ppg_data[0, 1]
            stress_data = get_stress_time_series(ppg_data)
            data = []
            for d in stress_data:
                data.append(DataPoint(start_time=datetime.datetime.fromtimestamp(d[0]/1000), offset=offset, sample=[d[1]]))
            self.store_stream(filepath="stress-from-wrist.json",
                              input_streams=input_streams, user_id=user_id,
                              data=data, localtime=False)
        except Exception as e:
            self.CC.logging.log("user_id: "+ user_id + " day: " + day)
            self.CC.logging.log("Exception:", str(e))
            self.CC.logging.log(str(traceback.format_exc()))

Example #14

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def standard_deviation_of_displacements(self,
                                            datawithtransit: object) -> object:
        """
        Standard deviation of displacements of a user in a day.

        :param datawithtransit: DataPoint array of centroid stream
        :return: standard deviation of displacements in a day
        :rtype: List(DataPoint) with a single element.
        """

        data = []
        ii = 0
        while ii < len(datawithtransit):
            if (float(datawithtransit[ii].sample[0]) != -1.0):
                data.append(datawithtransit[ii])
            ii = ii + 1

        if len(datawithtransit) == 0:
            return []

        mean_distance = 0
        i = 0
        while i < len(data) - 1:
            mean_distance = mean_distance + self.haversine(
                data[i].sample[2], data[i].sample[1], data[i + 1].sample[2],
                data[i + 1].sample[1])
            i = i + 1

        if len(data) < 2:
            return []

        mean_distance = mean_distance / (len(data) - 1)
        var_distance = 0
        j = 0
        while j < len(data) - 1:
            var_distance = var_distance + (self.haversine(
                data[j].sample[2], data[j].sample[1], data[j + 1].sample[2],
                data[j + 1].sample[1]) - mean_distance)**2
            j = j + 1

        standard_deviation = sqrt(var_distance / (len(data) - 1))

        start_time = data[0].start_time
        end_time = data[-1].end_time
        offset = data[0].offset

        stan_dev_datapoint = DataPoint(start_time, end_time, offset,
                                       standard_deviation)

        return [stan_dev_datapoint]

Example #15

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def maximum_distance_from_home(self, home_lattitude: object,
                                   home_longitude: object,
                                   centroiddata: object) -> object:
        """
        Maximum distance from home.

        :return:
        :param home_lattitude: lattitude of home's location
        :param home_longitude: longitude of home's location
        :param centroiddata: list of centroid datapoints.
        :rtype: List(DataPoint) with a single element.
        """

        max = 0

        jj = 0
        centroidwithouttransit = []
        while jj < len(centroiddata):
            if float(centroiddata[jj].sample[1]) != -1.0:
                centroidwithouttransit.append(centroiddata[jj])
            jj = jj + 1

        i = 0

        if len(centroidwithouttransit) == 0:
            return []

        while i < len(centroidwithouttransit):
            lattitude = centroidwithouttransit[i].sample[1]
            longitude = centroidwithouttransit[i].sample[2]
            distance = self.haversine(float(home_longitude),
                                      float(home_lattitude), float(longitude),
                                      float(lattitude))
            # print (distance)
            if (max < distance):
                max = distance
            i = i + 1

        start_time = centroiddata[0].start_time
        end_time = centroiddata[-1].end_time
        offset = centroiddata[0].offset

        max_datapoint = DataPoint(start_time, end_time, offset, max)

        return [max_datapoint]

Example #16

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def number_of_different_places(self, data: object) -> object:
        """
        Number of different places the participant visited in a day.

        :param data: DataPoint array of centroid stream
        :return: number of different places the participant visited in a day
        :rtype: List(DataPoint) with a single element.
        """

        num_diff_places = 0

        loc_array = []

        ii = 0
        while ii < len(data):
            if float(data[ii].sample[1]) == -1.0:
                ii = ii + 1
                continue
            concat_string = str(data[ii].sample[1]) + str(data[ii].sample[2])
            loc_array.append(concat_string)
            ii = ii + 1

        if len(loc_array) == 0:
            return []

        loc_dict = {}
        i = 0
        same = 0
        while i < len(loc_array):
            if (loc_array[i] in loc_dict.keys()):
                same = same + 1

            else:
                num_diff_places = num_diff_places + 1
                loc_dict[loc_array[i]] = 1

            i = i + 1

        start_time = data[0].start_time
        end_time = data[-1].end_time
        offset = data[0].offset
        num_of_diff_pls_datapoint = DataPoint(start_time, end_time, offset,
                                              num_diff_places)

        return [num_of_diff_pls_datapoint]

Example #17

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def mobility_places(self, data: object) -> object:
        """
        Returns list of lists places visited by the user in whole study. 
        Each element lists the places visited by the user in a day.

        :param data: DataPoint array of centroid stream
        :return: Mobility places of one participant in a day ( with interval 15 minutes ).
        :rtype: List(DataPoint).
        """

        mob_places = []
        # window size 15 minutes
        data_window_min = 15
        hours_in_day = 24

        i = 0
        while i < hours_in_day * 60 / data_window_min:
            mob_places.append("MISSING")
            i = i + 1

        for dp in data:

            start_hour = dp.start_time.hour
            start_minute = dp.start_time.minute
            start_index = ceil(
                (start_hour * 60 + start_minute) / data_window_min)
            end_hour = dp.end_time.hour
            end_minute = dp.end_time.minute
            end_index = floor((end_hour * 60 + end_minute) / data_window_min)

            index = start_index

            while index <= end_index:
                if (float(dp.sample[0]) != -1.0):
                    mob_places[index] = str(dp.sample[1]) + str(dp.sample[2])
                index = index + 1

        start_time = data[0].start_time
        end_time = data[-1].end_time
        offset = data[0].offset

        mob_pls_datapoint = DataPoint(start_time, end_time, offset, mob_places)

        return mob_pls_datapoint

Example #18

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def maximum_distance_between_two_locations(self, data: object) -> object:
        """
        Maximum distance between two locations covered by participant in kilometers in a day.

        :param data: DataPoint array of centroid stream
        :return: maximum distance between two locations covered by participant in kilometers
        :rtype: List(DataPoint) with a single element.
        """

        data_without_transit = []

        for dp in data:
            if (float(dp.sample[1]) != -1.0):
                data_without_transit.append(dp)

        if len(data_without_transit) == 0:
            return []

        max_dist_bet_two_locations = 0
        i = 0
        j = 0
        while i < len(data_without_transit):

            while j < len(data_without_transit):

                dist_bet_i_j = self.haversine(
                    float(data_without_transit[i].sample[2]),
                    float(data_without_transit[i].sample[1]),
                    float(data_without_transit[j].sample[2]),
                    float(data_without_transit[j].sample[1]))
                if (dist_bet_i_j > max_dist_bet_two_locations):
                    max_dist_bet_two_locations = dist_bet_i_j
                j = j + 1
            i = i + 1

        start_time = data[0].start_time
        end_time = data[-1].end_time
        offset = data[0].offset
        max_distance_datapoint = DataPoint(start_time, end_time, offset,
                                           max_dist_bet_two_locations)

        return [max_distance_datapoint]

Example #19

0

Show file

    def setUpClass(self):
        self.pp = PhoneFeatures()
        self.data = []
        for t in range(10, 1, -1):
            currentTime = datetime.datetime.now()
            self.data.append(
                DataPoint(currentTime - datetime.timedelta(hours=t - .1),
                          currentTime - datetime.timedelta(hours=t - .9), t))

        ownerUUID = uuid.uuid4()

        phonedata = []
        self.phoneDataStream = DataStream(identifier=uuid.uuid4(),
                                          owner=ownerUUID)
        self.phoneDataStream.data = phonedata

        smsdata = []
        self.smsDataStream = DataStream(identifier=uuid.uuid4(),
                                        owner=ownerUUID)
        self.smsDataStream.data = smsdata

Example #20

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def total_distance_covered(self, data: object) -> object:
        """
        Total distance covered in a day.

        :return:
        :param data: DataPoint array of centroid stream
        :return: total distance covered by participant in kilometers
        :rtype: List(DataPoint) with a single element.
        """

        total_distance = 0
        data_without_transit = []

        for dp in data:
            if (float(dp.sample[1]) != -1.0):
                data_without_transit.append(dp)

        if len(data_without_transit) == 0:
            return []

        i = 0
        while i <= len(data_without_transit) - 2:
            lattitude_pre = float(data_without_transit[i].sample[1])
            longitude_pre = float(data_without_transit[i].sample[2])

            lattitude_post = float(data_without_transit[i + 1].sample[1])
            longitude_post = float(data_without_transit[i + 1].sample[2])

            distance = self.haversine(longitude_pre, lattitude_pre,
                                      longitude_post, lattitude_post)
            total_distance = total_distance + distance
            i += 1

        start_time = data[0].start_time
        end_time = data[-1].end_time
        offset = data[0].offset

        total_distance_datapoint = DataPoint(start_time, end_time, offset,
                                             total_distance)

        return [total_distance_datapoint]

Example #21

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def available_data_in_time(self, data: object) -> object:
        """
        Available data of a participant in seconds.

        :param data: DataPoint array of centroid stream
        :return: available data in a day in seconds
        :rtype: List(DataPoint) with a single element.
        """
        total_time = 0
        for dp in data:
            total_time += (dp.end_time - dp.start_time).total_seconds()
        if total_time < 0:
            return []

        start_time = data[0].start_time
        end_time = data[-1].end_time
        offset = data[0].offset

        datapoint = DataPoint(start_time, end_time, offset, total_time)

        return [datapoint]

Example #22

0

Show file

File: activity_classifier.py Project: nsaleheen/CerebralCortex-DataAnalysis

def classify_activity(features: List[DataPoint], is_gravity) -> List[DataPoint]:
    """
    Classify activity from a set of input features based on a predefined ML model.

    :type is_gravity: bool
    :type features: List[DataPoint]
    :rtype: List[DataPoint]
    :param features: A set of features to run activity classification on
    :param is_gravity: Flag to account for gravity or not
    :return: Labeled activities
    """
    clf = get_activity_model(is_gravity)
    labels = []

    prediction_values = [dp.sample for dp in features]
    preds = clf.predict(prediction_values)
    for i, dp in enumerate(features):
        labels.append(DataPoint(start_time=dp.start_time, end_time=dp.end_time,
                                offset=dp.offset, sample=str(preds[i])))

    return labels

Example #23

0

Show file

File: gpsfeature.py Project: amimithun/CerebralCortex-DataAnalysis

    def routine_index(self, places: object) -> object:
        """
        Returns Routine Index for all days of the participant.

        :param places:
        :return: total distance covered by participant in kilometers
        :rtype: List(DataPoint) with a single element.

        """

        if len(places) <= 1:
            return []

        routine_ind_datapoints = []
        i = 0
        while i < len(places):

            j = 0
            summed_diff = 0

            while j < len(places):
                if (i != j):
                    summed_diff = summed_diff + self.average_difference(
                        places[i], places[j])
                j = j + 1
            routine_index_value = summed_diff / (len(places) - 1)
            start_time = places[i].start_time
            end_time = places[i].end_time
            offset = places[i].offset

            routine_ind_datapoint = DataPoint(start_time, end_time, offset,
                                              routine_index_value)
            routine_ind_datapoints.append(routine_ind_datapoint)

            i = i + 1

        return routine_ind_datapoints

Example #24

0

Show file

File: staying_time.py Project: amimithun/CerebralCortex-DataAnalysis

    def listing_all_staying_times(self, user_id: str, all_days: List[str]):
        """
        Produce and save the list of work_day's staying_time at office from
        "org.md2k.data_analysis.feature.working_days" stream and marked each day's
        staying_time as Usual_staying_time or More_than_usual or Less_than_usual. Staying time is
        saved in minute

        :param str user_id: UUID of the stream owner
        :param List(str) all_days: All days of the user in the format 'YYYYMMDD'
        :return:
        """

        self.CC.logging.log('%s started processing for user_id %s' %
                            (self.__class__.__name__, str(user_id)))

        stream_ids = self.CC.get_stream_id(user_id, Working_Days_STREAM)
        staying_time_data = []
        office_staying_times = list()
        for stream_id in stream_ids:
            for day in all_days:
                work_data_stream = \
                    self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=True)

                for data in work_data_stream.data:
                    arrival_time = data.start_time.hour * 60 + data.start_time.minute
                    leave_time = data.end_time.hour * 60 + data.end_time.minute
                    staying_time = leave_time - arrival_time
                    office_staying_times.append(staying_time)
                    sample = []
                    temp = DataPoint(data.start_time, data.end_time,
                                     data.offset, sample)
                    temp.sample.append(staying_time)
                    staying_time_data.append(temp)
        if not len(office_staying_times):
            return
        median = np.median(office_staying_times)
        mad_office_staying_times = []
        for staying_time in office_staying_times:
            # mad = median absolute deviation
            mad_office_staying_times.append(abs(staying_time - median))
        median2 = np.median(mad_office_staying_times)
        mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER
        outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER
        outlier_removed_office_staying_times = []
        for staying_time in office_staying_times:
            if (median - outlier_border) < staying_time < (median +
                                                           outlier_border):
                outlier_removed_office_staying_times.append(staying_time)
        if not len(outlier_removed_office_staying_times):
            outlier_removed_office_staying_times = office_staying_times
        mean = np.mean(outlier_removed_office_staying_times)
        standard_deviation = np.std(outlier_removed_office_staying_times)
        for data in staying_time_data:
            staying_time = data.sample[0]
            if staying_time > mean + standard_deviation:
                data.sample.append("more_than_usual")
                data.sample.append(
                    math.ceil(staying_time - (mean + standard_deviation)))
                data.sample.append(1)
            elif staying_time < mean - standard_deviation:
                data.sample.append("less_than_usual")
                data.sample.append(
                    math.ceil(mean - standard_deviation - staying_time))
                data.sample.append(0)
            else:
                data.sample.append("usual_staying_time")
                data.sample.append(0)
                data.sample.append(1)
        try:
            if len(staying_time_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == Working_Days_STREAM:
                        print("Going to pickle the file: ", staying_time_data)

                        self.store_stream(filepath="staying_time.json",
                                          input_streams=[stream_metadata],
                                          user_id=user_id,
                                          data=staying_time_data,
                                          localtime=True)
                        break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log(
            '%s finished processing for user_id %s saved %d '
            'data points' %
            (self.__class__.__name__, str(user_id), len(staying_time_data)))

Example #25

0

Show file

File: sleep_duration_analysis.py Project: amimithun/CerebralCortex-DataAnalysis

    def listing_all_sleep_duration_analysis(self, user_id: str, all_days: List[str]):
        """
        Produce and save the list of sleep duration acoording to day in one stream and marked
        each day's staying_time as Usual_sleep_duration or More_than_usual or Less_than_usual.
        Sleep duration is saved in hour. For each day's sleep duration the deviation from usual
        sleep duration is saved. All measure are in hour

        :param str user_id: UUID of the stream owner
        :param List(str) all_days: All days of the user in the format 'YYYYMMDD'
        :return:
        """
        self.CC.logging.log('%s started processing for user_id %s' %
                            (self.__class__.__name__, str(user_id)))

        stream_ids = self.CC.get_stream_id(user_id,
                                           Sleep_Durations_STREAM)
        sleep_duration_data = []
        sleep_durations = list()
        for stream_id in stream_ids:
            for day in all_days:
                sleep_duration_stream = \
                    self.CC.get_stream(stream_id["identifier"], user_id, day)

                for data in sleep_duration_stream.data:
                    sleep_duration = data.sample
                    sleep_durations.append(sleep_duration)
                    sample = []
                    sample.append(sleep_duration)
                    temp = DataPoint(data.start_time, data.end_time, data.offset, sample)
                    sleep_duration_data.append(temp)
        if not len(sleep_durations):
            return
        median = np.median(sleep_durations)
        mad_sleep_durations = []
        for sleep_duration in sleep_durations:
            # mad = median absolute deviation
            mad_sleep_durations.append(abs(sleep_duration - median))
        median2 = np.median(mad_sleep_durations)
        mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER
        outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER
        outlier_removed_sleep_durations = []
        for sleep_duration in sleep_durations:
            if sleep_duration > (median - outlier_border) and sleep_duration < (median + outlier_border):
                outlier_removed_sleep_durations.append(sleep_duration)

        if not len(outlier_removed_sleep_durations):
            outlier_removed_sleep_durations = sleep_durations
        mean = np.mean(outlier_removed_sleep_durations)
        standard_deviation = np.std(outlier_removed_sleep_durations)
        for data in sleep_duration_data:
            sleep_duration = data.sample[0]
            if sleep_duration > mean + standard_deviation:
                data.sample.append("more_than_usual")
                data.sample.append(sleep_duration - (mean + standard_deviation))
                data.sample.append(1)
            elif sleep_duration < mean-standard_deviation:
                data.sample.append("less_than_usual")
                data.sample.append(mean-standard_deviation - sleep_duration)
                data.sample.append(0)
            else:
                data.sample.append("usual_sleep_duration")
                data.sample.append(0)
                data.sample.append(1)

        try:
            if len(sleep_duration_data)>0:
                streams = self.CC.get_user_streams(user_id)
                if streams:
                    for stream_name, stream_metadata in streams.items():
                        if stream_name == Sleep_Durations_STREAM:

                            self.store_stream(filepath="sleep_duration_analysis.json",
                                              input_streams=[stream_metadata],
                                              user_id=user_id,
                                              data=sleep_duration_data)
                            break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log('%s finished processing for user_id %s saved %d '
                            'data points' %
                            (self.__class__.__name__, str(user_id),
                             len(sleep_duration_data)))

Example #26

0

Show file

File: expected_staying_time.py Project: Soujanya27/CerebralCortex-DataAnalysis

    def listing_all_expected_staying_times(self, user_id, all_days):
        """
        Produce and save the list of work_day's staying_time at office from
        "org.md2k.data_analysis.feature.working_days" stream and marked each day's
        staying_time as in_expected_conservative_time or more_than_expected_conservative_time
        or less_than_expected_conservative_time in one stream and in another stream each day's
        staying_time is marked as in_expected_liberal_time or more_than_expected_liberal_time
        or less_than_expected_liberal_time """

        self.CC.logging.log('%s started processing for user_id %s' %
                            (self.__class__.__name__, str(user_id)))

        stream_ids = self.CC.get_stream_id(user_id, Working_Days_STREAM)
        expected_conservative_staying_data = []
        expected_liberal_staying_data = []
        office_staying_times = list()
        for stream_id in stream_ids:
            if stream_id[
                    "identifier"] == '11e0934a-05fd-36d7-903a-9123a2e9f19b':
                print("This is that stream:")
                continue
            for day in all_days:
                work_data_stream = \
                    self.CC.get_stream(stream_id["identifier"], user_id, day)

                for data in work_data_stream.data:
                    arrival_time = data.start_time.hour * 60 + data.start_time.minute
                    leave_time = data.end_time.hour * 60 + data.end_time.minute
                    staying_time = leave_time - arrival_time
                    office_staying_times.append(staying_time)
                    sample = []
                    temp = DataPoint(data.start_time, data.end_time,
                                     data.offset, sample)
                    temp.sample.append(staying_time)
                    expected_conservative_staying_data.append(temp)
        if not len(office_staying_times):
            return
        median = np.median(office_staying_times)
        mad_office_staying_times = []
        for staying_time in office_staying_times:
            # mad = median absolute deviation
            mad_office_staying_times.append(abs(staying_time - median))
        median2 = np.median(mad_office_staying_times)
        mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER
        outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER
        outlier_removed_office_staying_times = []
        for staying_time in office_staying_times:
            if staying_time > (median - outlier_border) and staying_time < (
                    median + outlier_border):
                outlier_removed_office_staying_times.append(staying_time)
        if not len(outlier_removed_office_staying_times):
            outlier_removed_office_staying_times = office_staying_times
        actual_staying_time = np.mean(outlier_removed_office_staying_times)
        actual_minute = int(actual_staying_time % 60)
        actual_hour = int(actual_staying_time / 60)
        conservative_hour = actual_hour
        liberal_hour = actual_hour
        if actual_minute < 30:
            conservative_minute = 30
            liberal_minute = 0
        else:
            conservative_minute = 0
            liberal_minute = 30
            conservative_hour += 1
        conservative_time = conservative_hour * 60 + conservative_minute
        liberal_time = liberal_hour * 60 + liberal_minute
        for data in expected_conservative_staying_data:
            sample = []
            temp = DataPoint(data.start_time, data.end_time, data.offset,
                             sample)
            staying_time = data.sample[0]
            if staying_time > conservative_time:
                data.sample.append("more_than_expected_conservative_time")
                data.sample.append(math.ceil(staying_time - conservative_time))
            elif staying_time < conservative_time:
                data.sample.append("less_than_expected_conservative_time")
                data.sample.append(math.ceil(conservative_time - staying_time))
            elif staying_time == conservative_time:
                data.sample.append("in_expected_conservative_time")
                data.sample.append(0)
            temp.sample.append(staying_time)
            if staying_time > liberal_time:
                temp.sample.append("more_than_expected_liberal_time")
                temp.sample.append(math.ceil(staying_time - liberal_time))
            elif staying_time < liberal_time:
                temp.sample.append("less_than_expected_liberal_time")
                temp.sample.append(math.ceil(liberal_time - staying_time))
            elif staying_time == liberal_time:
                temp.sample.append("in_expected_liberal_time")
                temp.sample.append(0)
            expected_liberal_staying_data.append(temp)
        #         for data in expected_conservative_staying_data:
        #             print(data.start_time,data.sample)
        #         for data in expected_liberal_staying_data:
        #             print(data.start_time,data.sample)
        try:
            if len(expected_conservative_staying_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == Working_Days_STREAM:
                        self.store_stream(
                            filepath="expected_conservative_staying_time.json",
                            input_streams=[stream_metadata],
                            user_id=user_id,
                            data=expected_conservative_staying_data)
                        break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log('%s finished processing for user_id %s saved %d '
                            'data points' %
                            (self.__class__.__name__, str(user_id),
                             len(expected_conservative_staying_data)))
        try:
            if len(expected_liberal_staying_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == Working_Days_STREAM:
                        self.store_stream(
                            filepath="expected_liberal_staying_time.json",
                            input_streams=[stream_metadata],
                            user_id=user_id,
                            data=expected_liberal_staying_data)
                        break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log('%s finished processing for user_id %s saved %d '
                            'data points' %
                            (self.__class__.__name__, str(user_id),
                             len(expected_liberal_staying_data)))

Example #27

0

Show file

    def listing_all_expected_arrival_times_from_beacon(self, user_id: str, all_days: List[str]):
        """
        Produce and save the list of work_day's arrival_time at office from
        "org.md2k.data_analysis.feature.working_days_from_beacon" stream and marked each day's
        arrival_time as In_expected_conservative_time or before_expected_conservative_time
        or after_expected_conservative_time in one stream and in another stream each day's
        arrival_time is marked as In_expected_liberal_time or before_expected_liberal_time or
        after_expected_liberal_time

        :param str user_id: UUID of the stream owner
        :param List(str) all_days: All days of the user in the format 'YYYYMMDD'
        :return:
        """

        self.CC.logging.log('%s started processing for user_id %s' %
                            (self.__class__.__name__, str(user_id)))

        stream_ids = self.get_latest_stream_id(user_id,
                                           Working_Days_STREAM)
        expected_conservative_arrival_data = []
        expected_liberal_arrival_data = []
        office_arrival_times = list()
        for stream_id in stream_ids:
            for day in all_days:
                work_data_stream = \
                    self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=True)

                for data in work_data_stream.data:
                    arrival_time = data.start_time.hour * 60 + data.start_time.minute
                    office_arrival_times.append(arrival_time)
                    sample = []
                    temp = DataPoint(data.start_time, data.end_time, data.offset, sample)
                    expected_conservative_arrival_data.append(temp)
        if not len(office_arrival_times):
            return
        median = np.median(office_arrival_times)
        mad_arrival_times = []
        for arrival_time in office_arrival_times:
            # mad = median absolute deviation
            mad_arrival_times.append(abs(arrival_time - median))
        median2 = np.median(mad_arrival_times)
        mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER
        outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER
        outlier_removed_office_arrival_times = []
        for arrival_time in office_arrival_times:
            if (median - outlier_border) < arrival_time < (median + outlier_border):
                outlier_removed_office_arrival_times.append(arrival_time)
        if not len(outlier_removed_office_arrival_times):
            outlier_removed_office_arrival_times = office_arrival_times
        actual_time = np.mean(outlier_removed_office_arrival_times)
        actual_minute = int(actual_time % 60)
        actual_hour = int(actual_time / 60)
        conservative_hour = actual_hour
        liberal_hour = actual_hour
        if actual_minute < 30:
            conservative_minute = 0
            liberal_minute = 30
        else:
            conservative_minute = 30
            liberal_minute = 0
            liberal_hour += 1
        conservative_time = conservative_hour * 60 + conservative_minute
        liberal_time = liberal_hour * 60 + liberal_minute
        for data in expected_conservative_arrival_data:
            sample = []
            temp = DataPoint(data.start_time, data.end_time, data.offset, sample)
            arrival_time = data.start_time.hour * 60 + data.start_time.minute
            data.sample.append(data.start_time.time())
            if arrival_time > conservative_time:
                data.sample.append("after_expected_conservative_time")
                data.sample.append(math.ceil(arrival_time - conservative_time))
                data.sample.append(0)
            elif arrival_time < conservative_time:
                data.sample.append("before_expected_conservative_time")
                data.sample.append(math.ceil(conservative_time - arrival_time))
                data.sample.append(1)
            elif arrival_time == conservative_time:
                data.sample.append("in_expected_conservative_time")
                data.sample.append(0)
                data.sample.append(1)
            temp.sample.append(data.start_time.time())
            if arrival_time > liberal_time:
                temp.sample.append("after_expected_liberal_time")
                temp.sample.append(math.ceil(arrival_time - liberal_time))
                temp.sample.append(0)
            elif arrival_time < liberal_time:
                temp.sample.append("before_expected_liberal_time")
                temp.sample.append(math.ceil(liberal_time - arrival_time))
                temp.sample.append(1)
            elif arrival_time == liberal_time:
                temp.sample.append("in_expected_liberal_time")
                temp.sample.append(0)
                temp.sample.append(1)
            expected_liberal_arrival_data.append(temp)

        try:
            if len(expected_conservative_arrival_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == Working_Days_STREAM:
                        self.store_stream(filepath="expected_conservative_arrival_time_from_beacon.json",
                                          input_streams=[stream_metadata],
                                          user_id=user_id,
                                          data=expected_conservative_arrival_data, localtime=True)
                        break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log('%s finished processing for user_id %s saved %d '
                            'data points' %
                            (self.__class__.__name__, str(user_id),
                             len(expected_conservative_arrival_data)))
        try:
            if len(expected_liberal_arrival_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == Working_Days_STREAM:
                        self.store_stream(filepath="expected_liberal_arrival_time_from_beacon.json",
                                          input_streams=[stream_metadata],
                                          user_id=user_id,
                                          data=expected_liberal_arrival_data, localtime=True)
                        break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log('%s finished processing for user_id %s saved %d '
                            'data points' %
                            (self.__class__.__name__, str(user_id),
                             len(expected_liberal_arrival_data)))

Example #28

0

Show file

    def listing_all_arrival_times_from_beacon(self, user_id, all_days):
        """
        Produce and save the list of work_day's arrival_time at office's beacon
        from "org.md2k.data_analysis.feature.working_days_from_beacon" stream and
        marked each day's arrival_time as usual or before_time or after_time
        """

        self.CC.logging.log('%s started processing for user_id %s' %
                            (self.__class__.__name__, str(user_id)))

        stream_ids = self.CC.get_stream_id(user_id,
                                           Working_Days_STREAM)
        arrival_data = []
        office_arrival_times = list()
        for stream_id in stream_ids:
            for day in all_days:
                work_data_stream = \
                    self.CC.get_stream(stream_id["identifier"], user_id, day)

                for data in work_data_stream.data:
                    print(data)
                    arrival_time = data.start_time.hour*60+data.start_time.minute
                    office_arrival_times.append(arrival_time)
                    sample = []
                    temp = DataPoint(data.start_time, data.end_time, data.offset, sample)
                    arrival_data.append(temp)
        if not len(office_arrival_times):
            return
        median = np.median(office_arrival_times)
        mad_arrival_times = []
        for arrival_time in office_arrival_times:
            # mad = median absolute deviation
            mad_arrival_times.append(abs(arrival_time - median))
        median2 = np.median(mad_arrival_times)
        mad_value = median2 * MEDIAN_ABSOLUTE_DEVIATION_MULTIPLIER
        outlier_border = mad_value * OUTLIER_DETECTION_MULTIPLIER
        outlier_removed_office_arrival_times = []
        for arrival_time in office_arrival_times:
            if arrival_time > (median - outlier_border) and arrival_time < (median + outlier_border):
                outlier_removed_office_arrival_times.append(arrival_time)
        if not len(outlier_removed_office_arrival_times):
            outlier_removed_office_arrival_times = office_arrival_times
        mean = np.mean(outlier_removed_office_arrival_times)
        standard_deviation = np.std(outlier_removed_office_arrival_times)
        for data in arrival_data:
            arrival_time = data.start_time.hour*60 + data.start_time.minute
            data.sample.append(data.start_time.time())
            if arrival_time > mean+standard_deviation:
                data.sample.append("after_usual_time")
                data.sample.append(math.ceil(arrival_time-(mean+standard_deviation)))
            elif arrival_time < mean-standard_deviation:
                data.sample.append("before_usual_time")
                data.sample.append(math.ceil(mean-standard_deviation-arrival_time))
            else:
                data.sample.append("usual_time")
                data.sample.append(0)
        #print(arrival_data)
        try:
            if len(arrival_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == Working_Days_STREAM:
                        # print(stream_metadata)
                        print("Going to pickle the file: ",arrival_data)

                        self.store_stream(filepath="arrival_time_from_beacon.json",
                                          input_streams=[stream_metadata],
                                          user_id=user_id,
                                          data=arrival_data)
                        break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log('%s finished processing for user_id %s saved %d '
                            'data points' %
                            (self.__class__.__name__, str(user_id),
                             len(arrival_data)))

Example #29

0

Show file

    def listing_all_work_days(self, user_id: str, all_days: List[str]):
        """
        Produce and save the list of work_days Works-days are generated by the
        gps location of participant's which is labeled as 'Work' the first time
        of a day is marked as start time and the last time marked as end time
        and sample is saved as 'Office'

        :param str user_id: UUID of the stream owner
        :param List(str) all_days: All days of the user in the format 'YYYYMMDD'
        :return:
        """

        self.CC.logging.log('%s started processing for user_id %s' %
                            (self.__class__.__name__, str(user_id)))
        work_data = []
        work_data_ems = []
        location_data = []
        stream_ids = self.CC.get_stream_id(
            user_id, GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM)
        current_day = None  # in beginning current day is null
        for stream_id in stream_ids:
            for day in all_days:
                location_data_stream = \
                    self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=True)
                location_data += location_data_stream.data
        location_data = list(set(location_data))
        location_data.sort(key=lambda x: x.start_time)
        for data in location_data:
            print(data)
            if data.sample.lower() != "work":
                # only the data marked as Work are needed
                continue

            d = DataPoint(data.start_time, data.end_time, data.offset,
                          data.sample)

            if d.start_time.date() != current_day:
                '''
                when the day in d.start_time.date() is not equal
                current_day that means its a new day.
                '''
                if current_day:
                    temp = DataPoint(data.start_time, data.end_time,
                                     data.offset, data.sample)
                    temp.start_time = work_start_time
                    temp.end_time = work_end_time
                    temp.sample = 'Office'
                    work_data.append(temp)
                    work_data_ems.append(
                        DataPoint(temp.start_time, temp.end_time, temp.offset,
                                  1))
                work_start_time = d.start_time

                # save the new day as current day
                current_day = d.start_time.date()

            work_end_time = d.end_time
        if current_day:
            temp = DataPoint(data.start_time, data.end_time, data.offset,
                             data.sample)
            temp.start_time = work_start_time
            temp.end_time = work_end_time
            temp.sample = 'Office'
            work_data.append(temp)
            work_data_ems.append(
                DataPoint(temp.start_time, temp.end_time, temp.offset, 1))

        try:
            if len(work_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM:
                        self.store_stream(filepath="working_days.json",
                                          input_streams=[stream_metadata],
                                          user_id=user_id,
                                          data=work_data,
                                          localtime=True)
                        self.store_stream(filepath="working_days_ems.json",
                                          input_streams=[stream_metadata],
                                          user_id=user_id,
                                          data=work_data_ems,
                                          localtime=True)
                        break
        except Exception as e:
            print("Exception:", str(e))
            print(traceback.format_exc())
        self.CC.logging.log(
            '%s finished processing for user_id %s saved %d '
            'data points' %
            (self.__class__.__name__, str(user_id), len(work_data)))

Example #30

0

Show file

    def listing_all_gps_location_daywise(self, user_id: str,
                                         all_days: List[str]):
        """
        Produce and save the gps location of participant's in day basis

        :param str user_id: UUID of the stream owner
        :param List(str) all_days: All days of the user in the format 'YYYYMMDD'
        """

        self.CC.logging.log('%s started processing for user_id %s' %
                            (self.__class__.__name__, str(user_id)))
        gps_data = []
        stream_ids = self.CC.get_stream_id(
            user_id, GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM)
        for stream_id in stream_ids:

            for day in all_days:
                location_data_stream = \
                    self.CC.get_stream(stream_id["identifier"], user_id, day, localtime=False)

                for data in set(location_data_stream.data):

                    if data.start_time.date() != data.end_time.date():
                        temp = DataPoint(data.start_time, data.end_time,
                                         data.offset, data.sample)
                        start_day = data.start_time.date()
                        end_time = datetime.combine(start_day, time.max)
                        end_time = end_time.replace(
                            tzinfo=data.start_time.tzinfo)
                        temp.end_time = end_time
                        gps_data.append(temp)

                        end_day = data.end_time.date()
                        start_day += timedelta(days=1)
                        while start_day != end_day:
                            temp = DataPoint(data.start_time, data.end_time,
                                             data.offset, data.sample)
                            start_time = datetime.combine(start_day, time.min)
                            start_time = start_time.replace(
                                tzinfo=data.start_time.tzinfo)
                            temp.start_time = start_time
                            end_time = datetime.combine(start_day, time.max)
                            end_time = end_time.replace(
                                tzinfo=data.start_time.tzinfo)
                            temp.end_time = end_time
                            gps_data.append(temp)
                            start_day += timedelta(days=1)
                        temp = DataPoint(data.start_time, data.end_time,
                                         data.offset, data.sample)
                        start_time = datetime.combine(start_day, time.min)
                        start_time = start_time.replace(
                            tzinfo=data.start_time.tzinfo)
                        temp.start_time = start_time
                        gps_data.append(temp)
                    else:
                        gps_data.append(data)

        try:
            if len(gps_data):
                streams = self.CC.get_user_streams(user_id)
                for stream_name, stream_metadata in streams.items():
                    if stream_name == GPS_EPISODES_AND_SEMANTIC_lOCATION_STREAM:
                        self.store_stream(filepath="gps_location_daywise.json",
                                          input_streams=[stream_metadata],
                                          user_id=user_id,
                                          data=gps_data)
                        break
        except Exception as e:
            self.CC.logging.log("Exception:", str(e))
            self.CC.logging.log(traceback.format_exc())

        self.CC.logging.log(
            '%s finished processing for user_id %s saved %d '
            'data points' %
            (self.__class__.__name__, str(user_id), len(gps_data)))