def plot_cdf_new(data_abm, data_chad, fid, title, xlabel, do_periodic=False): """ This function plots the cumulative distribution function (CDF) comparing \ the ABM and CHAD data for a given activity :param numpy.ndarray data_abm: :param numpy.ndarray data_chad: :param int fid: the figure identifier :param str title: the title of the figure :param str xlabel: the label of the x-axis :param bool do_periodic: this flag indicates whether (if True) or not (if False) to convert \ the data to a time scale that is [-12, 12). This is useful for activities that may occur \ over midnight. :return: the figure of the CDF :rtype: matplotlib.figure.Figure """ # plot duration info fig = plt.figure(num=fid) if do_periodic: d_abm = mg.to_periodic(data_abm) d_chad = mg.to_periodic(data_chad) else: d_abm = data_abm d_chad = data_chad # plot if the dataset is not empty if data_abm.size != 0: plot_cdf(d_abm, d_chad, xlabel=xlabel, title=title) return fig
def is_workday(self, p): """ This function indicates whether or not the sleep event resembles that from a person sleeping for \ a workday. :param social.Social socio: the social characteristics of the person of interest :return: True, if the sleep event resembles a workday. False, otherwise. """ # default assumes that sleeping duration reflects a workday is_non_workday_sleep = False # if the person is employed if p.socio.job.is_employed: # write the time centered around midnight [-12 * HOUR_2_MIN, 12 * HOUR_2_MIN) t = mg.to_periodic(self.clock.time_of_day, do_hours=False) # store the day fo the week for today and tomorrow today = self.clock.day_of_week tomorrow = (today + 1 + 7) % 7 # the time to sleep is before midnight if (t < 0): is_non_workday_sleep = tomorrow not in p.socio.job.work_days else: # the time to sleep is after midnight is_non_workday_sleep = today not in p.socio.job.work_days # store if the sleep behavior resembles a workday workday_sleep = not is_non_workday_sleep return workday_sleep
def plot_activity_cdfs(d, keys): """ This function plots the cumulative distribution function of start time, end time, and duration for \ each activity in the the simulation. :param diary.Diary d: the results of the simulation :param list keys: list of activities to graph :return: """ # for each activity (key) for k in keys: # get diary information for the given activity df = d.df[d.df.act == k] # create subplots fig, axes = plt.subplots(2, 2) # title fig.suptitle(activity.INT_2_STR[k]) # number of samples for empirical cumulative distribution function (CDF) N = 1e3 # the labels for the subplots labels = ('start', 'end', 'duration') # the colors for the subplots colors = ('blue', 'green', 'red') # the axes for the subplots ax_list = (axes[0, 0], axes[0, 1], axes[1, 0]) # the data for the subplots data_list = (df.start.values, df.end.values, df.dt.values) # plot each subplot for ax, data, color, label in zip(ax_list, data_list, colors, labels): # use periodic time if start time can span over midnight if label == 'start' and k == mg.KEY_SLEEP: data = mg.to_periodic(data, do_hours=True) # calculate the empirical CDF x, y = mg.get_ecdf(data, N) # plot the values, set the x-axis label, set the legend ax.plot(x, y, color=color, label=label) # set the x-axis label ax.set_xlabel('hours') # set the legend ax.legend(loc='best') return
def analyze_sleep(data): """ This function analyzes the CHAD data for sleeping in order to get information \ on sleeping. The data are processed and filtered for use for ABMHAP for the \ sleep activity. :param chad.CHAD_RAW data: the raw CHAD data :return: the statistical data on CHAD sleep data :rtype: dictionary """ # the CHAD events data events = data.events # load raw data print('loading sleep data...') raw = data.activity_times(events, chad_code.SLEEP) # merge data across one day and the next (for events occurring over midnight) print('calculating merged data...') merged = merge(raw) # periodicity assumption print('calculating the periodicity assumption...') period = periodicity_sleep(merged) # limit the periodic df = period # filter out the bad data and keep the good data idx = (mg.to_periodic(df.start) >= chad.SLEEP_START_MIN) & (mg.to_periodic(df.start) <= chad.SLEEP_START_MAX) \ & (df.end >= chad.SLEEP_END_MIN) & (df.end <= chad.SLEEP_END_MAX) \ & (df.dt >= chad.SLEEP_DT_MIN) & (df.dt <= chad.SLEEP_DT_MAX) print('calculating the moments...') # get the sleep data with the good events data sleep = period[idx] # analyze the statistics of the sleep events d_sleep = get_moments(sleep, start_periodic=True) return d_sleep
def plot_activity_histograms(d, keys): """ This function plots the histograms of start time, end time, and duration for each activity in \ the the simulation. :param diary.Diary d: the results of the simulation :param list keys: list of activities to graph :return: """ for k in keys: # get diary information about the given activity df = d.df[d.df.act == k] # the number of bins (set to 24 to reflect 24 hours in a day) num_bins = 24 # create subplots fig, axes = plt.subplots(2, 2) # title fig.suptitle(activity.INT_2_STR[k]) # the labels for the subplots labels = ('start', 'end', 'duration') # the colors for the subplots colors = ('blue', 'green', 'red') # the axes for the subplots ax_list = (axes[0, 0], axes[0, 1], axes[1, 0]) # the data for subplots data_list = (df.start.values, df.end.values, df.dt.values) # for each subplot, plot the data for ax, data, color, label in zip(ax_list, data_list, colors, labels): # use periodic time if the start time can span over midnight if label == 'start' and k == mg.KEY_SLEEP: data = mg.to_periodic(data, do_hours=True) # plot the values ax.hist(data, bins=num_bins, color=color, label=label) # set the x-axis label ax.set_xlabel('hours') # set the legend ax.legend(loc='best') return
def get_end_date(date, start, end): """ This function finds the date that an activity ends. :param date: the date the activities start :type: numpy.ndarray of datetime.timedelta :param numpy.ndarray start: the start time of the activities :param numpy.ndarray end: the end time of activities :return: the end date for an activity :rtype: numpy.ndarray of datetime.timedelta """ # convert the start time and end time to be expressed in hours as [-12, 12) start_p = mg.to_periodic(start) end_p = mg.to_periodic(end) # this means an event started before midnight and ended starting at midnight( the next day) idx_polarity = np.sign(start_p * end_p) == -1 date_end = date + (idx_polarity) * datetime.timedelta(days=1) return date_end
def residual_analysis(pred, obs, N=int(1e3 + 1), do_periodic=False): """ This function takes the predicted and observed values and computes the respective cumulative distribution \ functions (CDFs) in units percentage and the inverted CDF which is the CDF in units of minutes. :param numpy.ndarray pred: the predicted values :param numpy.ndarray obs: the observed values :param int N: the number of points of the CDF vector :param bool do_periodic: a flag to see if the time data should be in a [-12, 12) hour format :return: the x values, CDF of residual, inverted CDF of residual :rtype: numpy.ndarray, pandas.core.frame.DataFrame, pandas.core.frame.DataFrame """ # combine two arrays g = lambda x, y: np.array(x.tolist() + y.tolist()) # offset off = 15e-1 # combine the data combo = g(pred, obs) # put the data in [-12, 12) format instead of [0, 24) hour format if do_periodic: combo = mg.to_periodic(combo) pred = mg.to_periodic(np.array(pred)) obs = mg.to_periodic(np.array(obs)) # get the upper and lower bounds of the data x_min, x_max = np.min(combo) - off, np.max(combo) + off # get the x values in the range of the cdfs x = np.linspace(x_min, x_max, num=N) # compute the residual cdf, inv_cdf = residual(pred=pred, obs=obs, x=x) return x, cdf, inv_cdf
def should_be_asleep(self, t_start, t_end): """ This function finds out if the person should be asleep for the initialization of the ABM module :param int t_start: start time of sleep [minutes, time of day] :param int t_end: end time of sleep [minutes, time of day] :return: a flag indicating whether a Person should be asleep (if True) or awake (if False) :rtype: bool """ do_hours = False # set the time to be in [-12 * 60, 12 * 60) instead of [0, 24 * 60) x = mg.to_periodic(self.clock.time_of_day, do_hours=do_hours) x_start = mg.to_periodic(t_start, do_hours=do_hours) x_end = mg.to_periodic(t_end, do_hours=do_hours) # find out if the person should be asleep is_asleep = (x >= x_start) and (x < x_end) return is_asleep
def get_stats(pid, data, do_periodic=False): """ This function gets the statistics about an activity-parameter (start time, end time, \ or duration) and stores the following data within a dataframe: #. person identifier (PID) #. the number of events (N) #. the mean (mu) #. the standard deviation (std) #. the coefficient of variation (cv) :param pid: the identifiers for the individuals within CHAD for a given activity :type pid: numpy.ndarray of str :param numpy.ndarray data: the CHAD records for a given activity :param bool do_periodic: a flag whether (if True) or not (if False) time of day \ should be expressed in [-12, 12) :return: the statistical results from an activity-parameter (start time, end time, \ or duration) :rtype: pandas.core.frame.DataFrame """ # dataframe list for the created data df_list = list() # for each person within the data for p in np.unique(pid): # get the correct indices, and the corresponding data idx = pid == p x = data[idx] if do_periodic: # display time in [-12, 12) instead of [0, 24) x = mg.to_periodic(x) # get the stats for the individual mu, std, cv, N = get_stats_individual(x) # store the information d = {'PID': p, 'N': N, 'mu': mu, 'std': std, 'cv': cv} df_list.append(d) # column names cols = ['PID', 'N', 'mu', 'std', 'cv'] # store the data in a data frame df = pd.DataFrame(df_list)[cols] return df
def plot_histograms(d, keys): """ This function plots the histograms of start time, end time, and duration for each activity in \ the the simulation. :param diary.Diary d: the results of the simulation :param list keys: list of activities to graph :return: """ for k in keys: df = d.df[d.df.act == k] num_bins = 24 fig, axes = plt.subplots(2, 2) fig.suptitle(activity.INT_2_STR[k]) # title # plot the start time distribution ax = axes[0, 0] if k == mg.KEY_SLEEP: ax.hist(mg.to_periodic(df.start.values), bins=num_bins, color='blue', label='start') else: ax.hist(df.start.values, bins=num_bins, color='blue', label='start') ax.set_xlabel('hours') ax.legend(loc='best') # plot the end time distribution ax = axes[0, 1] ax.hist(df.end.values, bins=num_bins, color='green', label='end') ax.set_xlabel('hours') ax.legend(loc='best') # plot the duration distribution ax = axes[1, 0] ax.hist(df.dt.values, bins=num_bins, color='red', label='duration') ax.set_xlabel('hours') ax.legend(loc='best') return
def plot_cdfs(d, keys): """ This function plots the cumulative distribution function of start time, end time, and duration for \ each activity in the the simulation. :param diary.Diary d: the results of the simulation :param list keys: list of activities to graph :return: """ for k in keys: df = d.df[d.df.act == k] fig, axes = plt.subplots(2, 2) # title fig.suptitle(activity.INT_2_STR[k]) # plot the start time distribution N = 1e3 ax = axes[0, 0] if k == mg.KEY_SLEEP: x, y = mg.get_ecdf(mg.to_periodic(df.start.values), N) else: x, y = mg.get_ecdf(df.start.values, N) ax.plot(x, y, color='blue', label='start') ax.set_xlabel('hours') ax.legend(loc='best') # plot the end time distribution ax = axes[0, 1] x, y = mg.get_ecdf(df.end.values, N) ax.plot(x, y, color='green', label='end') ax.set_xlabel('hours') ax.legend(loc='best') # plot the duration distribution ax = axes[1, 0] x, y = mg.get_ecdf(df.dt.values, N) ax.plot(x, y, color='red', label='duration') ax.set_xlabel('hours') ax.legend(loc='best') return
def get_record_help(self, x, lower, upper, do_periodic): """ This function finds the boolean indices of acceptable entries from an activity-parameter within \ the CHAD data. :param numpy.ndarray x: data for a given activity-parameter (i.e., duration, start time, or end time) :param float lower: the lower bound of acceptable values :param float upper: the upper bound of acceptable values :param bool do_periodic: a flag indicating whether (if True) or not (if False) to convert time to a [-12, 12) \ format due to an activity that could occur over midnight. :return: boolean indices of acceptable values, respectively :rtype: numpy.ndarray of int """ # covert time to a [-12, 12) format if do_periodic: x = mg.to_periodic(x) # boolean indices of acceptable values idx = np.array((x >= lower) & (x <= upper)) return idx
def separate_activities_into_days(data): """ This function finds the activities tha occur over midnight and breaks down \ creates a new activity diary in which an activity occurring over midnight \ is split into two activities: one activity entry ending at midnight, and \ one activity entry starting at midnight. :param pandas.core.frame.DataFrame data: the activity diary of an agent :return: the new activity diary :rtype: pandas.core.frame.DataFrame """ # one minute in hours one_min = 1 / temporal.HOUR_2_MIN # copy the data frame df = data.copy() # convert start time and duration to periodic time [-12, 12) df.start = mg.to_periodic(df.start, do_hours=True) df.end = mg.to_periodic(df.end, do_hours=True) # index for rollover of activities from one day to the next idx = (df.start.values < 0) * (df.end.values >= 0) # when an activity starts and ends on the same day df_same_day = df[~idx] # when an activity starts on one day and ends the next day df_next_day = df[idx] # the column labels # columns = df_next_day.columns x_list = list() for i in range(len(df_next_day)): x = df_next_day.iloc[i] day, start, end, act, loc = np.array([x.day]), np.array([x.start]), np.array([x.end]), \ np.array([x.act]), np.array([x['loc']]) d1 = {'day': day, 'start': start, 'end': [-one_min], 'dt': (-one_min - start + one_min), \ 'act': act, 'loc': loc} d2 = { 'day': day + 1, 'start': [0], 'end': end, 'dt': (end - 0 + one_min), 'act': act, 'loc': loc } x1 = pd.DataFrame(d1, columns=data.columns) x2 = pd.DataFrame(d2, columns=data.columns) x_list.append(x1) x_list.append(x2) # create the dataframe with multiple days df_two_day = pd.concat(x_list) # concatenate arrays df_new = pd.concat([df_same_day, df_two_day]) # sort values df_new = df_new.sort_values(by=['day', 'start']) return df_new
def get_current_meal(self, time_of_day): """ This function gets the closest meal to the time of day. :param int time_of_day: the time of day :return: return the meal :rtype: meal.Meal """ DAY_2_MIN = temporal.DAY_2_MIN # the number of meals N = self.num_meals # an array where the True values shows the index of the current meal idx = np.zeros(N, dtype=bool) # loop through each meal for q in range(N): # get the index for the next meal i = self.meals[q].id j = (i + 1) % N k = (i - 1) % N # store the time of day , current meal, next meal, and meal after next the start time t, ti, tj, tk = time_of_day, self.meals[i].t_start, self.meals[ j].t_start, self.meals[k].t_start # meal time is from the start time until the midpoint between the two meals # ex: t_max(bf) = t_start(bf) + ( t_start(lunch) - t_start(bf) ) / 2 # t_min(bf) = t_start(dinner) + ( t_start(bf) - t_start(dinner) ) /2 # doing math around zero. if (tj < ti): # change time to periodic time [-DAY_2_MIN / 2, DAY_2_MIN) tj = mg.to_periodic(tj, do_hours=False) ti = mg.to_periodic(ti, do_hours=False) # take the average top = np.floor((ti + tj) / 2).astype(int) # convert to normal time [0, DAY_2_MIN) top = mg.from_periodic(top, do_hours=False) else: top = np.floor((ti + tj) / 2).astype(int) # do math around zero if (tk > ti): # change time to periodic time [-DAY_2_MIN / 2, DAY_2_MIN) ti = mg.to_periodic(ti, do_hours=False) tk = mg.to_periodic(tk, do_hours=False) # take the average bot = np.floor((tk + ti) / 2).astype(int) # convert to normal time [0, DAY_2_MIN) bot = mg.from_periodic(bot, do_hours=False) else: bot = np.floor((tk + ti) / 2).astype(int) dt_max = (top - bot) % DAY_2_MIN dt0 = (t - bot) % DAY_2_MIN dt1 = (top - t) % DAY_2_MIN idx[i] = (dt0 <= dt_max) and (dt1 < dt_max) and (dt1 > 0) # the current index if idx.any(): ii = np.where(idx == True)[0][0] the_meal = self.meals[ii] else: the_meal = None return the_meal