Exemple #1
0
def fetch_tuebingen_2019(keep_original=True, cache=True, retain_corrections=False):
    """
    Fetches the tuebingen_2019 dataset from the internet. The original dataset or its cached version
    is stored in the :ref:`data home <storage>` folder.

    Parameters
    ----------
    keep_original : bool, default=True
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool, default=True
        Determines whether the data object should be stored as a binary file for faster
        succeeding access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.
    retain_corrections : bool, optional, default=False
        When set to *true* data points that are changed or dropped during preprocessing
        are listed in the respective attributes of the data object.  Fore more information
        about the attributes refer to the :ref:`user guide <error_correction>`.

    Returns
    -------
    data : object
    """
    dataset_name = 'tuebingen_2019'

    def load_tuebingen_2019(folder_path):
        return act_assist.load(folder_path, subjects=['M'])

    data = _fetch_handler(keep_original, cache, dataset_name,
                        TUE_2019_FILENAME, TUE_2019_URL,
                        load_tuebingen_2019)
    return data
Exemple #2
0
def fetch_casas_aruba(keep_original=True,
                      cache=True,
                      retain_corrections=False):
    """
    Fetches the casas aruba dataset from the internet. The original dataset or its cached version
    is stored in the :ref:`data home <storage>` folder.

    Parameters
    ----------
    keep_original : bool, default=True
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool, default=True
        Determines whether the data object should be stored as a binary file for quicker access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.
    retain_corrections : bool, default=False
        When set to *true* data points that are changed or dropped during preprocessing
        are listed in the respective attributes of the data object.  Fore more information
        about the attributes refer to the :ref:`user guide <error_correction>`.

    Examples
    --------
    >>> from pyadlml.dataset import fetch_casas_aruba
    >>> data = fetch_casas_aruba()
    >>> dir(data)
    >>> [..., df_activities, df_devices, ...]

    Returns
    -------
    data : object
    """
    dataset_name = 'casas_aruba'

    def load_casas_aruba(folder_path):
        _fix_data(os.path.join(folder_path, "data"))

        data_path = os.path.join(folder_path, 'corrected_data.csv')

        df = _load_df(data_path)
        df_dev = _get_devices_df(df)
        df_act = _get_activity_df(df)

        df_dev = correct_devices(df_dev)
        df_act, cor_lst = correct_activities(df_act)

        lst_act = df_act[ACTIVITY].unique()
        lst_dev = df_dev[DEVICE].unique()

        data = Data(df_act, df_dev, activity_list=lst_act, device_list=lst_dev)
        return data

    data = _fetch_handler(keep_original, cache, dataset_name,
                          CASAS_ARUBA_FILENAME, CASAS_ARUBA_URL,
                          load_casas_aruba)
    return data
Exemple #3
0
def fetch_amsterdam(keep_original=False, cache=True, retain_corrections=False):
    """
    Fetches the amsterdam dataset from the internet. The original dataset or its cached version
    is stored in the :ref:`data home <storage>` folder.

    Parameters
    ----------
    keep_original : bool, default=False
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool, default=True
        Determines whether the data object should be stored as a binary file for quicker access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.
    retain_corrections : bool, default=False
        When set to *true*, data points that change or drop during preprocessing
        are listed in respective attributes of the data object. Fore more information
        about error correction refer to the :ref:`user guide <error_correction>`.

    Examples
    --------
    >>> from pyadlml.dataset import fetch_amsterdam
    >>> data = fetch_amsterdam()
    >>> dir(data)
    >>> [..., df_activities, df_devices, ...]

    Returns
    -------
    data : object
    """
    dataset_name = 'amsterdam'

    def load_amsterdam(folder_path):
        device_fp = os.path.join(folder_path, "kasterenSenseData.txt")
        activity_fp = os.path.join(folder_path, "kasterenActData.txt")

        df_act = _load_activities(activity_fp)
        df_dev = _load_devices(device_fp)
        df_act, cor_lst = correct_activities(df_act)

        df_dev = correct_devices(df_dev)
        lst_act = df_act[ACTIVITY].unique()
        lst_dev = df_dev[DEVICE].unique()

        data = Data(df_act, df_dev, activity_list=lst_act, device_list=lst_dev)
        if retain_corrections:
            data.correction_activities = cor_lst
        return data

    data = _fetch_handler(keep_original, cache, dataset_name,
                        AMSTERDAM_FILENAME, AMSTERDAM_URL,
                        load_amsterdam)
    return data
Exemple #4
0
def fetch_uci_adl_binary(keep_original=True,
                         cache=True,
                         retain_corrections=False,
                         subject='OrdonezA'):
    """

    Parameters
    ----------
    keep_original : bool, default=True
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool
        Determines whether the data object should be stored as a binary file for quicker access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.
    retain_corrections : bool
        When set to *true* data points that are changed or dropped during preprocessing
        are listed in the respective attributes of the data object.  Fore more information
        about the attributes refer to the :ref:`user guide <error_correction>`.
    subject : str of {'OrdonezA', 'OrdonezB'}, default='OrdonezA'
        decides which dataset of the two houses is loaded.

    Returns
    -------
    data : object
    """
    assert subject in ['OrdonezA', 'OrdonezB']
    dataset_name = 'uci_adl_binary'

    def load_uci_adl_binary(folder_path):
        sub_dev_file = os.path.join(folder_path,
                                    '{}_Sensors.txt'.format(subject))
        if subject == 'OrdonezB':
            fix_OrdonezB_ADLS(os.path.join(folder_path, 'OrdonezB_ADLs.txt'))
            sub_act_file = os.path.join(folder_path,
                                        '{}_ADLs_corr.txt'.format(subject))
        else:
            sub_act_file = os.path.join(folder_path,
                                        '{}_ADLs.txt'.format(subject))

        return load(sub_dev_file, sub_act_file, retain_corrections, subject)

    data = _fetch_handler(keep_original,
                          cache,
                          dataset_name,
                          UCI_ADL_BINARY_FILENAME,
                          UCI_ADL_BINARY_URL,
                          load_uci_adl_binary,
                          data_postfix=subject)
    return data
Exemple #5
0
def fetch_amsterdam(keep_original=True,
                    cache=True,
                    remember_corrections=False):
    """
    Fetches the amsterdam dataset from the internet. The original dataset or its cached version
    is stored in the :ref:`data home <storage>` folder.

    Parameters
    ----------
    keep_original : bool, default=True
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool, default=True
        Determines whether the data object should be stored as a binary file for quicker access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.

    remember_corrections : bool
        When set to *true* data points that are changed or dropped during preprocessing
        are listed in the respective attributes of the data object.  Fore more information
        about the attributes refer to the :ref:`user guide <error_correction>`.

    Examples
    --------
    >>> from pyadlml.dataset import fetch_amsterdam
    >>> data = fetch_amsterdam()
    >>> dir(data)
    >>> [..., df_activities, df_devices, ...]

    Returns
    -------
    data : object
    """
    dataset_name = 'amsterdam'

    def load_amsterdam(folder_path):
        sensorData = os.path.join(folder_path, "kasterenSenseData.txt")
        activityData = os.path.join(folder_path, "kasterenActData.txt")
        return amsterdam.load(sensorData, activityData)

    data = _fetch_handler(keep_original, cache, dataset_name,
                          AMSTERDAM_FILENAME, AMSTERDAM_URL, load_amsterdam)
    return data
Exemple #6
0
def fetch_mitlab(keep_original=True, cache=True, subject='subject1'):
    """

    Parameters
    ----------
    keep_original : bool, default=True
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool, default=True
        Determines whether the data object should be stored as a binary file for quicker access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.

    remember_corrections : bool
        When set to *true* data points that are changed or dropped during preprocessing
        are listed in the respective attributes of the data object.  Fore more information
        about the attribtues refer to the :ref:`user guide <error_correction>`.

    Returns
    -------
    data : object
    """
    assert subject in ['subject1', 'subject2']
    dataset_name = 'mitlab'

    def load_mitlab(folder_path):
        sub_act = os.path.join(folder_path, subject, "Activities.csv")
        sub_dev = os.path.join(folder_path, subject, "sensors.csv")
        sub_data = os.path.join(folder_path, subject, "activities_data.csv")
        return mitlab.load(sub_dev, sub_act, sub_data)

    data = _fetch_handler(keep_original,
                          cache,
                          dataset_name,
                          MITLAB_FILENAME,
                          MITLAB_URL,
                          load_mitlab,
                          data_postfix=subject)
    return data
Exemple #7
0
def fetch_aras(keep_original=True, cache=True, retain_corrections=False):
    """
    Fetches the aras dataset from the internet. The original dataset or its cached version
    is stored in the :ref:`data home <storage>` folder.

    Parameters
    ----------
    keep_original : bool, default=True
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool, default=True
        Determines whether the data object should be stored as a binary file for quicker access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.
    retain_corrections : bool, default=False
        When set to *true* data points that are changed or dropped during preprocessing
        are listed in the respective attributes of the data object.  Fore more information
        about the attributes refer to the :ref:`user guide <error_correction>`.

    Examples
    --------
    >>> from pyadlml.dataset import fetch_aras
    >>> data = fetch_aras()
    >>> dir(data)
    >>> [..., df_activities, df_devices, ...]

    Returns
    -------
    data : object
    """
    dataset_name = 'aras'

    def load_aras(data_path):
        device_map = _get_device_map(data_path)
        activity_map = _get_activity_map(data_path)
        df = _read_data(data_path, activity_map, device_map)

        df_res1_act = _create_activity_df(df, 'Resident 1')
        df_res2_act = _create_activity_df(df, 'Resident 2')

        # TODO correct activities
        assert not _is_activity_overlapping(df_res1_act) \
            or not _is_activity_overlapping(df_res2_act)

        df_dev = _create_device_df(df)

        df_dev = correct_devices(df_dev)

        lst_res1_act = df_res1_act[ACTIVITY].unique()
        lst_res2_act = df_res2_act[ACTIVITY].unique()
        lst_dev = df_dev[DEVICE].unique()

        data = Data(None, df_dev, activity_list=None, device_list=lst_dev)

        data.df_dev_map = device_map
        data.df_act_map = activity_map

        data.df_activities_res1 = df_res1_act
        data.lst_activities_res1 = lst_res1_act

        data.df_activities_res2 = df_res2_act
        data.lst_activities_res2 = lst_res2_act

        if retain_corrections:
            data.correction_activities_res1 = []
            data.correction_activities_res2 = []

        return data

    data = _fetch_handler(keep_original, cache, dataset_name, ARAS_FILENAME,
                          ARAS_URL, load_aras)
    return data
Exemple #8
0
def fetch_mitlab(keep_original=False,
                 cache=True,
                 retain_corrections=False,
                 subject='subject1'):
    """
    Fetches the :ref:`mitlab <ds_mitlab>` dataset from the internet. The original dataset or its cached version
    is stored in the :ref:`data home <storage>` folder.

    Parameters
    ----------
    keep_original : bool, default=True
        Determines whether the original dataset is deleted after downloading
        or kept on the hard drive.
    cache : bool, default=True
        Determines whether the data object should be stored as a binary file for quicker access.
        For more information how caching is used refer to the :ref:`user guide <storage>`.
    retain_corrections : bool, default=False
        When set to *true*, data points that change or drop during preprocessing
        are listed in respective attributes of the data object. Fore more information
        about error correction refer to the :ref:`user guide <error_correction>`.
    subject : str of {'subject1', 'subject2'}
        determines

    Returns
    -------
    data : object
    """
    assert subject in ['subject1', 'subject2']
    dataset_name = 'mitlab'

    def load_mitlab(folder_path):
        act_path = os.path.join(folder_path, subject, "Activities.csv")
        dev_path = os.path.join(folder_path, subject, "sensors.csv")
        data_path = os.path.join(folder_path, subject, "activities_data.csv")

        df_dev_map = _load_device_map(dev_path)
        df_act_map = _load_activity_map(act_path)
        df_dev, df_act = _read_data(data_path, df_dev_map, df_act_map)

        df_act, cor_lst = correct_activities(df_act)
        df_dev = correct_devices(df_dev)

        lst_act = df_act[ACTIVITY].unique()
        lst_dev = df_dev[DEVICE].unique()

        data = Data(df_act, df_dev, activity_list=lst_act, device_list=lst_dev)

        data.df_dev_map = df_dev_map
        data.df_act_map = df_act_map

        if retain_corrections:
            data.correction_activities = cor_lst

        return data

    data = _fetch_handler(keep_original,
                          cache,
                          dataset_name,
                          MITLAB_FILENAME,
                          MITLAB_URL,
                          load_mitlab,
                          data_postfix=subject)
    return data