예제 #1
0
 def transform(self, df_activities):
     df = label_data(self.df_index, df_activities, self.idle)
     df = df.iloc[self.window_size:, :]
     encoded_labels = self._lbl_enc.transform(df['activity'].values)
     return pd.DataFrame(index=df.index,
                         data=encoded_labels,
                         columns=['activity'])
예제 #2
0
    def transform(self, X):
        """

        Parameters
        ----------
        X :

        """
        #if isinstance(X, pd.DataFrame) and set(X.columns) == {START_TIME, END_TIME, ACTIVITY}:
        #    df = label_data(X, self.df_acts, self.idle)
        #    encoded_labels = self._lbl_enc.fit_transform(df[ACTIVITY].values)
        #    return pd.DataFrame(index=df[TIME], data=encoded_labels, columns=[ACTIVITY])
        if isinstance(X, pd.DataFrame) and TIME in X.columns:
            df = label_data(X, self.df_acts_, self.idle)
            encoded_labels = self.lbl_enc.transform(df[ACTIVITY].values)
            return pd.DataFrame(data={
                TIME: df[TIME].values,
                ACTIVITY: encoded_labels
            })

        # return only the labels for a nd array
        elif isinstance(X, np.ndarray):
            return self.lbl_enc.transform(X)

        else:
            raise ValueError
예제 #3
0
 def fit(self, df_activities, y=None):
     """ labels data and creates the numeric representations 
     Parameters
     ----------
     df_activities : pd.DataFrame
         Columns are end_time, start_time, activity. 
     """
     df = label_data(self.df_devices, df_activities, self.idle)
     self._lbl_enc.fit(df[ACTIVITY].values)
예제 #4
0
 def transform(self, x):
     """
     
     """
     # if the input is a dataframe of activities, than fit and
     # transform the data accordingly 
     col_names = {'start_time', 'end_time', ACTIVITY}
     if isinstance(x, pd.DataFrame) and set(x.columns) == col_names:
         df = label_data(self.df_devices, x, self.idle)
         encoded_labels = self._lbl_enc.transform(df[ACTIVITY].values)
         return pd.DataFrame(index=df.index, data=encoded_labels, columns=[ACTIVITY])
     
     # return only the labels for a nd array 
     elif isinstance(x , np.ndarray):
         return self._lbl_enc.transform(x)
     else:
         raise ValueError
예제 #5
0
def contingency_table_triggers(df_devs, df_acts, idle=False):
    """
    output: 
        read like this: dev 1 was 123 times triggered while act 1 was present
        example
        ---------------------------
                | act 1 | .... | act n|
        ------------------------------- 
        dev 1 | 123   |      | 123  |
        ... 
        dev n | 123   |      | 123  | 
    """
    df = label_data(df_devs, df_acts, idle=idle)
    df['val'] = 1
    return pd.pivot_table(df,
                          columns='activity',
                          index='device',
                          values='val',
                          aggfunc=len,
                          fill_value=0)
예제 #6
0
    def fit_transform(self, df_acts, X):
        """
        Fit label encoder and return encoded labels.

        Parameters
        ----------
        df_acts : pd.DataFrame, optional
            recorded activities from a dataset. Fore more information refer to the
            :ref:`user guide<activity_dataframe>`.

        Returns
        ------
        df : pd.DataFrame
        """
        self.fit(df_acts)
        df = label_data(X, self.df_acts_, self.idle)
        encoded_labels = self.lbl_enc.fit_transform(df[ACTIVITY].values)
        return pd.DataFrame(data={
            TIME: df[TIME].values,
            ACTIVITY: encoded_labels
        })
예제 #7
0
def contingency_table_triggers(df_devs, df_acts, idle=False):
    """
    Compute the amount of device triggers occuring during the different activities.

    Parameters
    ----------
    df_devs : pd.DataFrame
        All recorded devices from a dataset. For more information refer to
        :ref:`user guide<device_dataframe>`.
    df_acts : pd.DataFrame
        All recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    idle : bool
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.

    Examples
    --------
    >>> from pyadlml.stats import contingency_triggers
    >>> contingency_triggers(data.df_devices, data.df_activities)
    activity            get drink  go to bed  ...  use toilet
    device
    Cups cupboard              36          0  ...           0
    Dishwasher                  2          0  ...           0
               ...            ...        ...  ...         ...
    Washingmachine              0          0  ...           0
    [7 rows x 7 columns]

    Results
    -------
    df : pd.DataFrame
    """
    df = label_data(df_devs, df_acts, idle=idle)
    df[VAL] = 1
    return pd.pivot_table(df,
                          columns=ACTIVITY,
                          index=DEVICE,
                          values=VAL,
                          aggfunc=len,
                          fill_value=0)
예제 #8
0
def contingency_table_triggers_01(df_devs, df_acts, idle=False):
    """
    output: 
        read like this: dev 1 turned 123 times from 1 to 0 while act 1 was present
        example
        ---------------------------
                | act 1 | .... | act n|
        ------------------------------- 
        dev 1 0 | 123   |      | 123  | 
        dev 1 1 | 122   |      | 141  |
        ... 
        dev n 0 | 123   |      | 123  | 
        dev n 1 | 122   |      | 141  |
    """
    df = label_data(df_devs, df_acts, idle=idle)
    df['val2'] = df['val'].astype(int)
    return pd.pivot_table(df,
                          columns='activity',
                          index=['device', 'val'],
                          values='val2',
                          aggfunc=len,
                          fill_value=0)
예제 #9
0
 def fit(self, df_activities):
     df = label_data(self.df_index, df_activities, self.idle)
     # start where the labeling begins
     df = df.iloc[self.window_size:, :]
     self._lbl_enc.fit(df['activity'].values)
예제 #10
0
 def transform(self, df_activities):
     df = label_data(self.df_devices, df_activities, self.idle)
     encoded_labels = self._lbl_enc.transform(df['activity'].values)
     return pd.DataFrame(index=df.index,
                         data=encoded_labels,
                         columns=['activity'])
예제 #11
0
 def fit(self, df_activities):
     df = label_data(self.df_devices, df_activities, self.idle)
     self._lbl_enc.fit(df['activity'].values)
예제 #12
0
def contingency_table_triggers_01(df_devs, df_acts, idle=False):
    """
    Compute the amount a device turns "on" or "off" respectively
    during the different activities.

    Parameters
    ----------
    df_devs : pd.DataFrame
        All recorded devices from a dataset. For more information refer to
        :ref:`user guide<device_dataframe>`.
    df_acts : pd.DataFrame
        All recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    idle : bool, optional, default: False
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.

    Examples
    --------
    >>> from pyadlml.stats import contingency_triggers_01
    >>> contingency_triggers_01(data.df_devices, data.df_activities)
    activity            get drink  go to bed  ... use toilet
    devices                                   ...
    Cups cupboard Off   18          0         ...          0
    Cups cupboard On    18          0         ...          0
    Dishwasher Off       1          0         ...          0
    Dishwasher On        1          0         ...          0
                   ... ...        ...         ...        ...
    Washingmachine Off   0          0         ...          0
    Washingmachine On    0          0         ...          0
    [14 rows x 7 columns]

    Results
    -------
    df : pd.DataFrame
    """
    dev_index = 'devices'
    ON = 'On'
    OFF = 'Off'
    df = label_data(df_devs, df_acts, idle=idle)

    df['val2'] = df[VAL].astype(int)
    df = pd.pivot_table(df,
                        columns=ACTIVITY,
                        index=[DEVICE, VAL],
                        values='val2',
                        aggfunc=len,
                        fill_value=0)

    # format text strings
    def func(x):
        if "False" in x:
            return x[:-len("False")] + " Off"
        else:
            return x[:-len("True")] + " On"

    df = df.reset_index()
    df[dev_index] = df[DEVICE] + df[VAL].astype(str)
    df[dev_index] = df[dev_index].apply(func)
    df = df.set_index(dev_index)
    df = df.drop([DEVICE, VAL], axis=1)
    return df