def fetch_tuebingen_2019(keep_original=True, cache=True, retain_corrections=False): """ Fetches the tuebingen_2019 dataset from the internet. The original dataset or its cached version is stored in the :ref:`data home <storage>` folder. Parameters ---------- keep_original : bool, default=True Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool, default=True Determines whether the data object should be stored as a binary file for faster succeeding access. For more information how caching is used refer to the :ref:`user guide <storage>`. retain_corrections : bool, optional, default=False When set to *true* data points that are changed or dropped during preprocessing are listed in the respective attributes of the data object. Fore more information about the attributes refer to the :ref:`user guide <error_correction>`. Returns ------- data : object """ dataset_name = 'tuebingen_2019' def load_tuebingen_2019(folder_path): return act_assist.load(folder_path, subjects=['M']) data = _fetch_handler(keep_original, cache, dataset_name, TUE_2019_FILENAME, TUE_2019_URL, load_tuebingen_2019) return data
def fetch_casas_aruba(keep_original=True, cache=True, retain_corrections=False): """ Fetches the casas aruba dataset from the internet. The original dataset or its cached version is stored in the :ref:`data home <storage>` folder. Parameters ---------- keep_original : bool, default=True Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool, default=True Determines whether the data object should be stored as a binary file for quicker access. For more information how caching is used refer to the :ref:`user guide <storage>`. retain_corrections : bool, default=False When set to *true* data points that are changed or dropped during preprocessing are listed in the respective attributes of the data object. Fore more information about the attributes refer to the :ref:`user guide <error_correction>`. Examples -------- >>> from pyadlml.dataset import fetch_casas_aruba >>> data = fetch_casas_aruba() >>> dir(data) >>> [..., df_activities, df_devices, ...] Returns ------- data : object """ dataset_name = 'casas_aruba' def load_casas_aruba(folder_path): _fix_data(os.path.join(folder_path, "data")) data_path = os.path.join(folder_path, 'corrected_data.csv') df = _load_df(data_path) df_dev = _get_devices_df(df) df_act = _get_activity_df(df) df_dev = correct_devices(df_dev) df_act, cor_lst = correct_activities(df_act) lst_act = df_act[ACTIVITY].unique() lst_dev = df_dev[DEVICE].unique() data = Data(df_act, df_dev, activity_list=lst_act, device_list=lst_dev) return data data = _fetch_handler(keep_original, cache, dataset_name, CASAS_ARUBA_FILENAME, CASAS_ARUBA_URL, load_casas_aruba) return data
def fetch_amsterdam(keep_original=False, cache=True, retain_corrections=False): """ Fetches the amsterdam dataset from the internet. The original dataset or its cached version is stored in the :ref:`data home <storage>` folder. Parameters ---------- keep_original : bool, default=False Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool, default=True Determines whether the data object should be stored as a binary file for quicker access. For more information how caching is used refer to the :ref:`user guide <storage>`. retain_corrections : bool, default=False When set to *true*, data points that change or drop during preprocessing are listed in respective attributes of the data object. Fore more information about error correction refer to the :ref:`user guide <error_correction>`. Examples -------- >>> from pyadlml.dataset import fetch_amsterdam >>> data = fetch_amsterdam() >>> dir(data) >>> [..., df_activities, df_devices, ...] Returns ------- data : object """ dataset_name = 'amsterdam' def load_amsterdam(folder_path): device_fp = os.path.join(folder_path, "kasterenSenseData.txt") activity_fp = os.path.join(folder_path, "kasterenActData.txt") df_act = _load_activities(activity_fp) df_dev = _load_devices(device_fp) df_act, cor_lst = correct_activities(df_act) df_dev = correct_devices(df_dev) lst_act = df_act[ACTIVITY].unique() lst_dev = df_dev[DEVICE].unique() data = Data(df_act, df_dev, activity_list=lst_act, device_list=lst_dev) if retain_corrections: data.correction_activities = cor_lst return data data = _fetch_handler(keep_original, cache, dataset_name, AMSTERDAM_FILENAME, AMSTERDAM_URL, load_amsterdam) return data
def fetch_uci_adl_binary(keep_original=True, cache=True, retain_corrections=False, subject='OrdonezA'): """ Parameters ---------- keep_original : bool, default=True Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool Determines whether the data object should be stored as a binary file for quicker access. For more information how caching is used refer to the :ref:`user guide <storage>`. retain_corrections : bool When set to *true* data points that are changed or dropped during preprocessing are listed in the respective attributes of the data object. Fore more information about the attributes refer to the :ref:`user guide <error_correction>`. subject : str of {'OrdonezA', 'OrdonezB'}, default='OrdonezA' decides which dataset of the two houses is loaded. Returns ------- data : object """ assert subject in ['OrdonezA', 'OrdonezB'] dataset_name = 'uci_adl_binary' def load_uci_adl_binary(folder_path): sub_dev_file = os.path.join(folder_path, '{}_Sensors.txt'.format(subject)) if subject == 'OrdonezB': fix_OrdonezB_ADLS(os.path.join(folder_path, 'OrdonezB_ADLs.txt')) sub_act_file = os.path.join(folder_path, '{}_ADLs_corr.txt'.format(subject)) else: sub_act_file = os.path.join(folder_path, '{}_ADLs.txt'.format(subject)) return load(sub_dev_file, sub_act_file, retain_corrections, subject) data = _fetch_handler(keep_original, cache, dataset_name, UCI_ADL_BINARY_FILENAME, UCI_ADL_BINARY_URL, load_uci_adl_binary, data_postfix=subject) return data
def fetch_amsterdam(keep_original=True, cache=True, remember_corrections=False): """ Fetches the amsterdam dataset from the internet. The original dataset or its cached version is stored in the :ref:`data home <storage>` folder. Parameters ---------- keep_original : bool, default=True Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool, default=True Determines whether the data object should be stored as a binary file for quicker access. For more information how caching is used refer to the :ref:`user guide <storage>`. remember_corrections : bool When set to *true* data points that are changed or dropped during preprocessing are listed in the respective attributes of the data object. Fore more information about the attributes refer to the :ref:`user guide <error_correction>`. Examples -------- >>> from pyadlml.dataset import fetch_amsterdam >>> data = fetch_amsterdam() >>> dir(data) >>> [..., df_activities, df_devices, ...] Returns ------- data : object """ dataset_name = 'amsterdam' def load_amsterdam(folder_path): sensorData = os.path.join(folder_path, "kasterenSenseData.txt") activityData = os.path.join(folder_path, "kasterenActData.txt") return amsterdam.load(sensorData, activityData) data = _fetch_handler(keep_original, cache, dataset_name, AMSTERDAM_FILENAME, AMSTERDAM_URL, load_amsterdam) return data
def fetch_mitlab(keep_original=True, cache=True, subject='subject1'): """ Parameters ---------- keep_original : bool, default=True Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool, default=True Determines whether the data object should be stored as a binary file for quicker access. For more information how caching is used refer to the :ref:`user guide <storage>`. remember_corrections : bool When set to *true* data points that are changed or dropped during preprocessing are listed in the respective attributes of the data object. Fore more information about the attribtues refer to the :ref:`user guide <error_correction>`. Returns ------- data : object """ assert subject in ['subject1', 'subject2'] dataset_name = 'mitlab' def load_mitlab(folder_path): sub_act = os.path.join(folder_path, subject, "Activities.csv") sub_dev = os.path.join(folder_path, subject, "sensors.csv") sub_data = os.path.join(folder_path, subject, "activities_data.csv") return mitlab.load(sub_dev, sub_act, sub_data) data = _fetch_handler(keep_original, cache, dataset_name, MITLAB_FILENAME, MITLAB_URL, load_mitlab, data_postfix=subject) return data
def fetch_aras(keep_original=True, cache=True, retain_corrections=False): """ Fetches the aras dataset from the internet. The original dataset or its cached version is stored in the :ref:`data home <storage>` folder. Parameters ---------- keep_original : bool, default=True Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool, default=True Determines whether the data object should be stored as a binary file for quicker access. For more information how caching is used refer to the :ref:`user guide <storage>`. retain_corrections : bool, default=False When set to *true* data points that are changed or dropped during preprocessing are listed in the respective attributes of the data object. Fore more information about the attributes refer to the :ref:`user guide <error_correction>`. Examples -------- >>> from pyadlml.dataset import fetch_aras >>> data = fetch_aras() >>> dir(data) >>> [..., df_activities, df_devices, ...] Returns ------- data : object """ dataset_name = 'aras' def load_aras(data_path): device_map = _get_device_map(data_path) activity_map = _get_activity_map(data_path) df = _read_data(data_path, activity_map, device_map) df_res1_act = _create_activity_df(df, 'Resident 1') df_res2_act = _create_activity_df(df, 'Resident 2') # TODO correct activities assert not _is_activity_overlapping(df_res1_act) \ or not _is_activity_overlapping(df_res2_act) df_dev = _create_device_df(df) df_dev = correct_devices(df_dev) lst_res1_act = df_res1_act[ACTIVITY].unique() lst_res2_act = df_res2_act[ACTIVITY].unique() lst_dev = df_dev[DEVICE].unique() data = Data(None, df_dev, activity_list=None, device_list=lst_dev) data.df_dev_map = device_map data.df_act_map = activity_map data.df_activities_res1 = df_res1_act data.lst_activities_res1 = lst_res1_act data.df_activities_res2 = df_res2_act data.lst_activities_res2 = lst_res2_act if retain_corrections: data.correction_activities_res1 = [] data.correction_activities_res2 = [] return data data = _fetch_handler(keep_original, cache, dataset_name, ARAS_FILENAME, ARAS_URL, load_aras) return data
def fetch_mitlab(keep_original=False, cache=True, retain_corrections=False, subject='subject1'): """ Fetches the :ref:`mitlab <ds_mitlab>` dataset from the internet. The original dataset or its cached version is stored in the :ref:`data home <storage>` folder. Parameters ---------- keep_original : bool, default=True Determines whether the original dataset is deleted after downloading or kept on the hard drive. cache : bool, default=True Determines whether the data object should be stored as a binary file for quicker access. For more information how caching is used refer to the :ref:`user guide <storage>`. retain_corrections : bool, default=False When set to *true*, data points that change or drop during preprocessing are listed in respective attributes of the data object. Fore more information about error correction refer to the :ref:`user guide <error_correction>`. subject : str of {'subject1', 'subject2'} determines Returns ------- data : object """ assert subject in ['subject1', 'subject2'] dataset_name = 'mitlab' def load_mitlab(folder_path): act_path = os.path.join(folder_path, subject, "Activities.csv") dev_path = os.path.join(folder_path, subject, "sensors.csv") data_path = os.path.join(folder_path, subject, "activities_data.csv") df_dev_map = _load_device_map(dev_path) df_act_map = _load_activity_map(act_path) df_dev, df_act = _read_data(data_path, df_dev_map, df_act_map) df_act, cor_lst = correct_activities(df_act) df_dev = correct_devices(df_dev) lst_act = df_act[ACTIVITY].unique() lst_dev = df_dev[DEVICE].unique() data = Data(df_act, df_dev, activity_list=lst_act, device_list=lst_dev) data.df_dev_map = df_dev_map data.df_act_map = df_act_map if retain_corrections: data.correction_activities = cor_lst return data data = _fetch_handler(keep_original, cache, dataset_name, MITLAB_FILENAME, MITLAB_URL, load_mitlab, data_postfix=subject) return data