Ejemplo n.º 1
0
def plot_cdf_new(data_abm, data_chad, fid, title, xlabel, do_periodic=False):
    """
    This function plots the cumulative distribution function (CDF) comparing \
    the ABM and CHAD data for a given activity

    :param numpy.ndarray data_abm:
    :param numpy.ndarray data_chad:
    :param int fid: the figure identifier
    :param str title: the title of the figure
    :param str xlabel: the label of the x-axis
    :param bool do_periodic: this flag indicates whether (if True) or not (if False) to convert \
    the data to a time scale that is [-12, 12). This is useful for activities that may occur \
    over midnight.

    :return: the figure of the CDF
    :rtype: matplotlib.figure.Figure
    """

    # plot duration info
    fig = plt.figure(num=fid)

    if do_periodic:
        d_abm = mg.to_periodic(data_abm)
        d_chad = mg.to_periodic(data_chad)
    else:
        d_abm = data_abm
        d_chad = data_chad

    # plot if the dataset is not empty
    if data_abm.size != 0:
        plot_cdf(d_abm, d_chad, xlabel=xlabel, title=title)

    return fig
Ejemplo n.º 2
0
    def is_workday(self, p):
        """
        This function indicates whether or not the sleep event resembles that from a person sleeping for \
        a workday.

        :param social.Social socio: the social characteristics of the person of interest
        :return: True, if the sleep event resembles a workday. False, otherwise.
        """

        # default assumes that sleeping duration reflects a workday
        is_non_workday_sleep = False

        # if the person is employed
        if p.socio.job.is_employed:

            # write the time centered around midnight [-12 * HOUR_2_MIN, 12 * HOUR_2_MIN)
            t = mg.to_periodic(self.clock.time_of_day, do_hours=False)

            # store the day fo the week for today and tomorrow
            today = self.clock.day_of_week
            tomorrow = (today + 1 + 7) % 7

            # the time to sleep is before midnight
            if (t < 0):
                is_non_workday_sleep = tomorrow not in p.socio.job.work_days
            else:
                # the time to sleep is after midnight
                is_non_workday_sleep = today not in p.socio.job.work_days

        # store if the sleep behavior resembles a workday
        workday_sleep = not is_non_workday_sleep

        return workday_sleep
Ejemplo n.º 3
0
def plot_activity_cdfs(d, keys):
    """
    This function plots the cumulative distribution function of start time, end time, and duration for \
    each activity in the the simulation.

    :param diary.Diary d: the results of the simulation
    :param list keys: list of activities to graph
    :return:
    """

    # for each activity (key)
    for k in keys:

        # get diary information for the given activity
        df = d.df[d.df.act == k]

        # create subplots
        fig, axes = plt.subplots(2, 2)

        # title
        fig.suptitle(activity.INT_2_STR[k])

        # number of samples for empirical cumulative distribution function (CDF)
        N = 1e3

        # the labels for the subplots
        labels = ('start', 'end', 'duration')

        # the colors for the subplots
        colors = ('blue', 'green', 'red')

        # the axes for the subplots
        ax_list = (axes[0, 0], axes[0, 1], axes[1, 0])

        # the data for the subplots
        data_list = (df.start.values, df.end.values, df.dt.values)

        # plot each subplot
        for ax, data, color, label in zip(ax_list, data_list, colors, labels):

            # use periodic time if start time can span over midnight
            if label == 'start' and k == mg.KEY_SLEEP:
                data = mg.to_periodic(data, do_hours=True)

            # calculate the empirical CDF
            x, y = mg.get_ecdf(data, N)

            # plot the values, set the x-axis label, set the legend
            ax.plot(x, y, color=color, label=label)

            # set the x-axis label
            ax.set_xlabel('hours')

            # set the legend
            ax.legend(loc='best')

    return
Ejemplo n.º 4
0
def analyze_sleep(data):
    """
    This function analyzes the CHAD data for sleeping in order to get information \
    on sleeping. The data are processed and filtered for use for ABMHAP for the \
    sleep activity.

    :param chad.CHAD_RAW data: the raw CHAD data

    :return: the statistical data on CHAD sleep data
    :rtype: dictionary
    """

    # the CHAD events data
    events = data.events

    # load raw data
    print('loading sleep data...')
    raw = data.activity_times(events, chad_code.SLEEP)

    # merge data across one day and the next (for events occurring over midnight)
    print('calculating merged data...')
    merged = merge(raw)

    # periodicity assumption
    print('calculating the periodicity assumption...')
    period = periodicity_sleep(merged)

    # limit the periodic
    df = period

    # filter out the bad data and keep the good data
    idx = (mg.to_periodic(df.start) >= chad.SLEEP_START_MIN) & (mg.to_periodic(df.start) <= chad.SLEEP_START_MAX) \
          & (df.end >= chad.SLEEP_END_MIN) & (df.end <= chad.SLEEP_END_MAX) \
          & (df.dt >= chad.SLEEP_DT_MIN) & (df.dt <= chad.SLEEP_DT_MAX)

    print('calculating the moments...')

    # get the sleep data with the good events data
    sleep = period[idx]

    # analyze the statistics of the sleep events
    d_sleep = get_moments(sleep, start_periodic=True)

    return d_sleep
Ejemplo n.º 5
0
def plot_activity_histograms(d, keys):
    """
    This function plots the histograms of start time, end time, and duration for each activity in \
    the the simulation.

    :param diary.Diary d: the results of the simulation
    :param list keys: list of activities to graph
    :return:
    """

    for k in keys:

        # get diary information about the given activity
        df = d.df[d.df.act == k]

        # the number of bins (set to 24 to reflect 24 hours in a day)
        num_bins = 24

        # create subplots
        fig, axes = plt.subplots(2, 2)

        # title
        fig.suptitle(activity.INT_2_STR[k])

        # the labels for the subplots
        labels = ('start', 'end', 'duration')

        # the colors for the subplots
        colors = ('blue', 'green', 'red')

        # the axes for the subplots
        ax_list = (axes[0, 0], axes[0, 1], axes[1, 0])

        # the data for subplots
        data_list = (df.start.values, df.end.values, df.dt.values)

        # for each subplot, plot the data
        for ax, data, color, label in zip(ax_list, data_list, colors, labels):

            # use periodic time if the start time can span over midnight
            if label == 'start' and k == mg.KEY_SLEEP:
                data = mg.to_periodic(data, do_hours=True)

            # plot the values
            ax.hist(data, bins=num_bins, color=color, label=label)

            # set the x-axis label
            ax.set_xlabel('hours')

            # set the legend
            ax.legend(loc='best')

    return
Ejemplo n.º 6
0
def get_end_date(date, start, end):
    """
    This function finds the date that an activity ends.

    :param date: the date the activities start
    :type: numpy.ndarray of datetime.timedelta
    :param numpy.ndarray start: the start time of the activities
    :param numpy.ndarray end: the end time of activities

    :return: the end date for an activity
    :rtype: numpy.ndarray of datetime.timedelta
    """

    # convert the start time and end time to be expressed in hours as [-12, 12)
    start_p = mg.to_periodic(start)
    end_p = mg.to_periodic(end)

    # this means an event started before midnight and ended starting at midnight( the next day)
    idx_polarity = np.sign(start_p * end_p) == -1
    date_end = date + (idx_polarity) * datetime.timedelta(days=1)

    return date_end
Ejemplo n.º 7
0
def residual_analysis(pred, obs, N=int(1e3 + 1), do_periodic=False):
    """
    This function takes the predicted and observed values and computes the respective cumulative distribution \
    functions (CDFs) in units percentage and the inverted CDF which is the CDF in units of minutes.

    :param numpy.ndarray pred: the predicted values
    :param numpy.ndarray obs: the observed values
    :param int N: the number of points of the CDF vector
    :param bool do_periodic: a flag to see if the time data should be in a [-12, 12) hour format

    :return: the x values, CDF of residual, inverted CDF of residual
    :rtype: numpy.ndarray, pandas.core.frame.DataFrame, pandas.core.frame.DataFrame
    """

    # combine two arrays
    g = lambda x, y: np.array(x.tolist() + y.tolist())

    # offset
    off = 15e-1

    # combine the data
    combo = g(pred, obs)

    # put the data in [-12, 12) format instead of [0, 24) hour format
    if do_periodic:
        combo = mg.to_periodic(combo)
        pred = mg.to_periodic(np.array(pred))
        obs = mg.to_periodic(np.array(obs))

    # get the upper and lower bounds of the data
    x_min, x_max = np.min(combo) - off, np.max(combo) + off

    # get the x values in the range of the cdfs
    x = np.linspace(x_min, x_max, num=N)

    # compute the residual
    cdf, inv_cdf = residual(pred=pred, obs=obs, x=x)

    return x, cdf, inv_cdf
Ejemplo n.º 8
0
    def should_be_asleep(self, t_start, t_end):
        """
        This function finds out if the person should be asleep for the initialization of the ABM module

        :param int t_start:  start time of sleep [minutes, time of day]
        :param int t_end: end time of sleep [minutes, time of day]

        :return: a flag indicating whether a Person should be asleep (if True) or awake (if False)
        :rtype: bool
        """

        do_hours = False

        # set the time to be in [-12 * 60, 12 * 60) instead of [0, 24 * 60)
        x = mg.to_periodic(self.clock.time_of_day, do_hours=do_hours)
        x_start = mg.to_periodic(t_start, do_hours=do_hours)
        x_end = mg.to_periodic(t_end, do_hours=do_hours)

        # find out if the person should be asleep
        is_asleep = (x >= x_start) and (x < x_end)

        return is_asleep
Ejemplo n.º 9
0
def get_stats(pid, data, do_periodic=False):
    """
    This function gets the statistics about an activity-parameter (start time, end time, \
    or duration) and stores the following data within a dataframe:

    #. person identifier (PID)
    #. the number of events (N)
    #. the mean (mu)
    #. the standard deviation (std)
    #. the coefficient of variation (cv)

    :param pid: the identifiers for the individuals within CHAD for a given activity
    :type pid: numpy.ndarray of str
    :param numpy.ndarray data: the CHAD records for a given activity
    :param bool do_periodic: a flag whether (if True) or not (if False) time of day \
    should be expressed in [-12, 12)

    :return: the statistical results from an activity-parameter (start time, end time, \
    or duration)
    :rtype: pandas.core.frame.DataFrame
    """

    # dataframe list for the created data
    df_list = list()

    # for each person within the data
    for p in np.unique(pid):

        # get the correct indices, and the corresponding data
        idx = pid == p
        x = data[idx]

        if do_periodic:

            # display time in [-12, 12) instead of [0, 24)
            x = mg.to_periodic(x)

        # get the stats for the individual
        mu, std, cv, N = get_stats_individual(x)

        # store the information
        d = {'PID': p, 'N': N, 'mu': mu, 'std': std, 'cv': cv}
        df_list.append(d)

    # column names
    cols = ['PID', 'N', 'mu', 'std', 'cv']

    # store the data in a data frame
    df = pd.DataFrame(df_list)[cols]

    return df
Ejemplo n.º 10
0
def plot_histograms(d, keys):
    """
    This function plots the histograms of start time, end time, and duration for each activity in \
    the the simulation.

    :param diary.Diary d: the results of the simulation
    :param list keys: list of activities to graph
    :return:
    """

    for k in keys:

        df = d.df[d.df.act == k]
        num_bins = 24
        fig, axes = plt.subplots(2, 2)

        fig.suptitle(activity.INT_2_STR[k])
        # title

        # plot the start time distribution

        ax = axes[0, 0]
        if k == mg.KEY_SLEEP:
            ax.hist(mg.to_periodic(df.start.values),
                    bins=num_bins,
                    color='blue',
                    label='start')
        else:
            ax.hist(df.start.values,
                    bins=num_bins,
                    color='blue',
                    label='start')
        ax.set_xlabel('hours')
        ax.legend(loc='best')

        # plot the end time distribution
        ax = axes[0, 1]
        ax.hist(df.end.values, bins=num_bins, color='green', label='end')
        ax.set_xlabel('hours')
        ax.legend(loc='best')

        # plot the duration distribution
        ax = axes[1, 0]
        ax.hist(df.dt.values, bins=num_bins, color='red', label='duration')
        ax.set_xlabel('hours')
        ax.legend(loc='best')

    return
Ejemplo n.º 11
0
def plot_cdfs(d, keys):
    """
    This function plots the cumulative distribution function of start time, end time, and duration for \
    each activity in the the simulation.

    :param diary.Diary d: the results of the simulation
    :param list keys: list of activities to graph
    :return:
    """

    for k in keys:

        df = d.df[d.df.act == k]

        fig, axes = plt.subplots(2, 2)

        # title
        fig.suptitle(activity.INT_2_STR[k])

        # plot the start time distribution
        N = 1e3
        ax = axes[0, 0]
        if k == mg.KEY_SLEEP:
            x, y = mg.get_ecdf(mg.to_periodic(df.start.values), N)
        else:
            x, y = mg.get_ecdf(df.start.values, N)

        ax.plot(x, y, color='blue', label='start')
        ax.set_xlabel('hours')
        ax.legend(loc='best')

        # plot the end time distribution
        ax = axes[0, 1]
        x, y = mg.get_ecdf(df.end.values, N)
        ax.plot(x, y, color='green', label='end')
        ax.set_xlabel('hours')
        ax.legend(loc='best')

        # plot the duration distribution
        ax = axes[1, 0]
        x, y = mg.get_ecdf(df.dt.values, N)
        ax.plot(x, y, color='red', label='duration')
        ax.set_xlabel('hours')
        ax.legend(loc='best')

    return
Ejemplo n.º 12
0
    def get_record_help(self, x, lower, upper, do_periodic):
        """
        This function finds the boolean indices of acceptable entries from an activity-parameter within \
        the CHAD data.

        :param numpy.ndarray x: data for a given activity-parameter (i.e., duration, start time, or end time)
        :param float lower: the lower bound of acceptable values
        :param float upper: the upper bound of acceptable values
        :param bool do_periodic: a flag indicating whether (if True) or not (if False) to convert time to a [-12, 12) \
        format due to an activity that could occur over midnight.

        :return: boolean indices of acceptable values, respectively
        :rtype: numpy.ndarray of int
        """

        # covert time to a [-12, 12) format
        if do_periodic:
            x = mg.to_periodic(x)

        # boolean indices of acceptable values
        idx = np.array((x >= lower) & (x <= upper))

        return idx
Ejemplo n.º 13
0
def separate_activities_into_days(data):
    """
    This function finds the activities tha occur over midnight and breaks down \
    creates a new activity diary in which an activity occurring over midnight \
    is split into two activities: one activity entry ending at midnight, and \
    one activity entry starting at midnight.

    :param pandas.core.frame.DataFrame data: the activity diary of an agent

    :return: the new activity diary
    :rtype: pandas.core.frame.DataFrame
    """

    # one minute in hours
    one_min = 1 / temporal.HOUR_2_MIN

    # copy the data frame
    df = data.copy()

    # convert start time and duration to periodic time [-12, 12)
    df.start = mg.to_periodic(df.start, do_hours=True)
    df.end = mg.to_periodic(df.end, do_hours=True)

    # index for rollover of activities from one day to the next
    idx = (df.start.values < 0) * (df.end.values >= 0)

    # when an activity starts and ends on the same day
    df_same_day = df[~idx]

    # when an activity starts on one day and ends the next day
    df_next_day = df[idx]

    # the column labels
    # columns = df_next_day.columns

    x_list = list()
    for i in range(len(df_next_day)):
        x = df_next_day.iloc[i]

        day, start, end, act, loc = np.array([x.day]), np.array([x.start]), np.array([x.end]), \
                                    np.array([x.act]), np.array([x['loc']])

        d1 = {'day': day, 'start': start, 'end': [-one_min], 'dt': (-one_min - start + one_min), \
              'act': act, 'loc': loc}

        d2 = {
            'day': day + 1,
            'start': [0],
            'end': end,
            'dt': (end - 0 + one_min),
            'act': act,
            'loc': loc
        }

        x1 = pd.DataFrame(d1, columns=data.columns)
        x2 = pd.DataFrame(d2, columns=data.columns)

        x_list.append(x1)
        x_list.append(x2)

    # create the dataframe with multiple days
    df_two_day = pd.concat(x_list)

    # concatenate arrays
    df_new = pd.concat([df_same_day, df_two_day])

    # sort values
    df_new = df_new.sort_values(by=['day', 'start'])

    return df_new
Ejemplo n.º 14
0
    def get_current_meal(self, time_of_day):
        """
        This function gets the closest meal to the time of day.

        :param int time_of_day: the time of day
        :return: return the meal
        :rtype: meal.Meal        
        """

        DAY_2_MIN = temporal.DAY_2_MIN

        # the number of meals
        N = self.num_meals

        # an array where the True values shows the index of the current meal
        idx = np.zeros(N, dtype=bool)

        # loop through each meal
        for q in range(N):

            # get the index for the next meal
            i = self.meals[q].id
            j = (i + 1) % N
            k = (i - 1) % N

            # store the time of day , current meal, next meal, and meal after next the start time
            t, ti, tj, tk = time_of_day, self.meals[i].t_start, self.meals[
                j].t_start, self.meals[k].t_start

            # meal time is from the start time until the midpoint between the two meals
            # ex: t_max(bf) = t_start(bf) + ( t_start(lunch) - t_start(bf) ) / 2
            # t_min(bf) = t_start(dinner) + ( t_start(bf) - t_start(dinner) ) /2

            # doing math around zero.
            if (tj < ti):
                # change time to periodic time [-DAY_2_MIN / 2, DAY_2_MIN)
                tj = mg.to_periodic(tj, do_hours=False)
                ti = mg.to_periodic(ti, do_hours=False)

                # take the average
                top = np.floor((ti + tj) / 2).astype(int)

                # convert to normal time [0, DAY_2_MIN)
                top = mg.from_periodic(top, do_hours=False)

            else:
                top = np.floor((ti + tj) / 2).astype(int)

            # do math around zero
            if (tk > ti):
                # change time to periodic time [-DAY_2_MIN / 2, DAY_2_MIN)
                ti = mg.to_periodic(ti, do_hours=False)
                tk = mg.to_periodic(tk, do_hours=False)

                # take the average
                bot = np.floor((tk + ti) / 2).astype(int)

                # convert to normal time [0, DAY_2_MIN)
                bot = mg.from_periodic(bot, do_hours=False)
            else:
                bot = np.floor((tk + ti) / 2).astype(int)

            dt_max = (top - bot) % DAY_2_MIN
            dt0 = (t - bot) % DAY_2_MIN
            dt1 = (top - t) % DAY_2_MIN

            idx[i] = (dt0 <= dt_max) and (dt1 < dt_max) and (dt1 > 0)

        # the current index
        if idx.any():
            ii = np.where(idx == True)[0][0]
            the_meal = self.meals[ii]
        else:
            the_meal = None

        return the_meal