def transform(self, df_activities): df = label_data(self.df_index, df_activities, self.idle) df = df.iloc[self.window_size:, :] encoded_labels = self._lbl_enc.transform(df['activity'].values) return pd.DataFrame(index=df.index, data=encoded_labels, columns=['activity'])
def transform(self, X): """ Parameters ---------- X : """ #if isinstance(X, pd.DataFrame) and set(X.columns) == {START_TIME, END_TIME, ACTIVITY}: # df = label_data(X, self.df_acts, self.idle) # encoded_labels = self._lbl_enc.fit_transform(df[ACTIVITY].values) # return pd.DataFrame(index=df[TIME], data=encoded_labels, columns=[ACTIVITY]) if isinstance(X, pd.DataFrame) and TIME in X.columns: df = label_data(X, self.df_acts_, self.idle) encoded_labels = self.lbl_enc.transform(df[ACTIVITY].values) return pd.DataFrame(data={ TIME: df[TIME].values, ACTIVITY: encoded_labels }) # return only the labels for a nd array elif isinstance(X, np.ndarray): return self.lbl_enc.transform(X) else: raise ValueError
def fit(self, df_activities, y=None): """ labels data and creates the numeric representations Parameters ---------- df_activities : pd.DataFrame Columns are end_time, start_time, activity. """ df = label_data(self.df_devices, df_activities, self.idle) self._lbl_enc.fit(df[ACTIVITY].values)
def transform(self, x): """ """ # if the input is a dataframe of activities, than fit and # transform the data accordingly col_names = {'start_time', 'end_time', ACTIVITY} if isinstance(x, pd.DataFrame) and set(x.columns) == col_names: df = label_data(self.df_devices, x, self.idle) encoded_labels = self._lbl_enc.transform(df[ACTIVITY].values) return pd.DataFrame(index=df.index, data=encoded_labels, columns=[ACTIVITY]) # return only the labels for a nd array elif isinstance(x , np.ndarray): return self._lbl_enc.transform(x) else: raise ValueError
def contingency_table_triggers(df_devs, df_acts, idle=False): """ output: read like this: dev 1 was 123 times triggered while act 1 was present example --------------------------- | act 1 | .... | act n| ------------------------------- dev 1 | 123 | | 123 | ... dev n | 123 | | 123 | """ df = label_data(df_devs, df_acts, idle=idle) df['val'] = 1 return pd.pivot_table(df, columns='activity', index='device', values='val', aggfunc=len, fill_value=0)
def fit_transform(self, df_acts, X): """ Fit label encoder and return encoded labels. Parameters ---------- df_acts : pd.DataFrame, optional recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. Returns ------ df : pd.DataFrame """ self.fit(df_acts) df = label_data(X, self.df_acts_, self.idle) encoded_labels = self.lbl_enc.fit_transform(df[ACTIVITY].values) return pd.DataFrame(data={ TIME: df[TIME].values, ACTIVITY: encoded_labels })
def contingency_table_triggers(df_devs, df_acts, idle=False): """ Compute the amount of device triggers occuring during the different activities. Parameters ---------- df_devs : pd.DataFrame All recorded devices from a dataset. For more information refer to :ref:`user guide<device_dataframe>`. df_acts : pd.DataFrame All recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. idle : bool Determines whether gaps between activities should be assigned the activity *idle* or be ignored. Examples -------- >>> from pyadlml.stats import contingency_triggers >>> contingency_triggers(data.df_devices, data.df_activities) activity get drink go to bed ... use toilet device Cups cupboard 36 0 ... 0 Dishwasher 2 0 ... 0 ... ... ... ... ... Washingmachine 0 0 ... 0 [7 rows x 7 columns] Results ------- df : pd.DataFrame """ df = label_data(df_devs, df_acts, idle=idle) df[VAL] = 1 return pd.pivot_table(df, columns=ACTIVITY, index=DEVICE, values=VAL, aggfunc=len, fill_value=0)
def contingency_table_triggers_01(df_devs, df_acts, idle=False): """ output: read like this: dev 1 turned 123 times from 1 to 0 while act 1 was present example --------------------------- | act 1 | .... | act n| ------------------------------- dev 1 0 | 123 | | 123 | dev 1 1 | 122 | | 141 | ... dev n 0 | 123 | | 123 | dev n 1 | 122 | | 141 | """ df = label_data(df_devs, df_acts, idle=idle) df['val2'] = df['val'].astype(int) return pd.pivot_table(df, columns='activity', index=['device', 'val'], values='val2', aggfunc=len, fill_value=0)
def fit(self, df_activities): df = label_data(self.df_index, df_activities, self.idle) # start where the labeling begins df = df.iloc[self.window_size:, :] self._lbl_enc.fit(df['activity'].values)
def transform(self, df_activities): df = label_data(self.df_devices, df_activities, self.idle) encoded_labels = self._lbl_enc.transform(df['activity'].values) return pd.DataFrame(index=df.index, data=encoded_labels, columns=['activity'])
def fit(self, df_activities): df = label_data(self.df_devices, df_activities, self.idle) self._lbl_enc.fit(df['activity'].values)
def contingency_table_triggers_01(df_devs, df_acts, idle=False): """ Compute the amount a device turns "on" or "off" respectively during the different activities. Parameters ---------- df_devs : pd.DataFrame All recorded devices from a dataset. For more information refer to :ref:`user guide<device_dataframe>`. df_acts : pd.DataFrame All recorded activities from a dataset. Fore more information refer to the :ref:`user guide<activity_dataframe>`. idle : bool, optional, default: False Determines whether gaps between activities should be assigned the activity *idle* or be ignored. Examples -------- >>> from pyadlml.stats import contingency_triggers_01 >>> contingency_triggers_01(data.df_devices, data.df_activities) activity get drink go to bed ... use toilet devices ... Cups cupboard Off 18 0 ... 0 Cups cupboard On 18 0 ... 0 Dishwasher Off 1 0 ... 0 Dishwasher On 1 0 ... 0 ... ... ... ... ... Washingmachine Off 0 0 ... 0 Washingmachine On 0 0 ... 0 [14 rows x 7 columns] Results ------- df : pd.DataFrame """ dev_index = 'devices' ON = 'On' OFF = 'Off' df = label_data(df_devs, df_acts, idle=idle) df['val2'] = df[VAL].astype(int) df = pd.pivot_table(df, columns=ACTIVITY, index=[DEVICE, VAL], values='val2', aggfunc=len, fill_value=0) # format text strings def func(x): if "False" in x: return x[:-len("False")] + " Off" else: return x[:-len("True")] + " On" df = df.reset_index() df[dev_index] = df[DEVICE] + df[VAL].astype(str) df[dev_index] = df[dev_index].apply(func) df = df.set_index(dev_index) df = df.drop([DEVICE, VAL], axis=1) return df