Esempio n. 1
0
def _preprocess_sensor_data(item, all_items, **kwargs):
    # get session boundaries
    metas = GroupBy.get_meta(item)

    # load data
    data_loader = delayed(fileio.load_sensor)
    loaded_data = data_loader(GroupBy.get_data(item))
    # apply offset mapping
    get_offset = partial(dataset.get_offset, offset_column=1)
    offset_in_secs = delayed(get_offset)(GroupBy.get_data(item))
    offset_data = delayed(dataframe.offset)(loaded_data, offset_in_secs)

    # apply orientation corrections
    orientation_correction = delayed(dataset.get_orientation_correction)(
        GroupBy.get_data(item))

    flip_and_swap = apply_on_accelerometer_dataframe(orientation.flip_and_swap)

    corrected_data = delayed(flip_and_swap)(offset_data,
                                            x_flip=orientation_correction[0],
                                            y_flip=orientation_correction[1],
                                            z_flip=orientation_correction[2])

    dataset_name = kwargs['dataset_name']

    corrected_data = delayed(save_to_file)(corrected_data, metas, dataset_name)

    return GroupBy.bundle(corrected_data, **metas)
Esempio n. 2
0
def preprocess_annotations(item, all_items, **kwargs):
    # get session boundaries
    metas = GroupBy.get_meta(item)

    # load data
    data_loader = delayed(fileio.load_annotation)
    loaded_data = data_loader(GroupBy.get_data(item))

    return GroupBy.bundle(loaded_data, **metas)
Esempio n. 3
0
def load_data(item, all_items, **kwargs):
    # get session boundaries
    metas = GroupBy.get_meta(item)

    # load data
    data_loader = delayed(fileio.load_sensor)
    loaded_data = data_loader(GroupBy.get_data(item))

    return GroupBy.bundle(loaded_data, **metas)
Esempio n. 4
0
def count_total_rows(data, all_data, **kwargs):
    @delayed
    def load_data(data):
        return pd.read_csv(data, parse_dates=[0], infer_datetime_format=True)

    @delayed
    def count(data):
        return data.shape[0]

    df = load_data(GroupBy.get_data(data))
    return GroupBy.bundle(count(df))
Esempio n. 5
0
def load_data(item, all_items, *, old_sr, new_sr, **kwargs):
    # get session boundaries
    metas = GroupBy.get_meta(item)

    # load data
    data_loader = delayed(fileio.load_sensor)
    loaded_data = data_loader(GroupBy.get_data(item))
    if old_sr == new_sr:
        resampled_data = loaded_data
    else:
        print('resampling raw data...from {} to {}'.format(old_sr, new_sr))
        resampled_data = resample_data(loaded_data,
                                       old_sr=old_sr,
                                       new_sr=new_sr)
    return GroupBy.bundle(resampled_data, **metas)
Esempio n. 6
0
def sum_rows(group_items, **kwargs):
    group_items = [GroupBy.get_data(group_item) for group_item in group_items]
    return GroupBy.bundle(np.sum(group_items))
Esempio n. 7
0
def greater_than_zero(data, all_data):
    return GroupBy.bundle(GroupBy.get_data(data) > 0)
def load_data(data, all_data, **kwargs):
    metas = GroupBy.get_meta(data)
    return GroupBy.bundle(delayed(fileio.load_sensor)(GroupBy.get_data(data)), **metas)
        group_df = pd.concat(groups[group_name])
        group_df['GROUP_NAME'] = group_name
        group_dfs.append(group_df)
    result = pd.concat(group_dfs)
    return result

if __name__ == '__main__':
    import pprint
    from glob import glob
    from padar_parallel.groupby import GroupBy
    from padar_parallel.grouper import MHealthGrouper
    from padar_converter.mhealth import dataset
    input_files = glob(
        'D:/data/spades_lab/SPADES_[1-9]/MasterSynced/**/Actigraph*.sensor.csv', recursive=True)
    pprint.pprint(input_files)
    grouper = MHealthGrouper(input_files)
    groupby_obj = GroupBy(
        input_files, **MhealthWindowing.make_metas(input_files))
    groupby_obj.split(grouper.pid_group(),
                      grouper.sid_group(),
                      group_types = ['PID', 'SID'],
                      ingroup_sortkey_func=lambda x: dataset.get_file_timestamp(GroupBy.get_data(x)))
    groupby_obj.apply(load_data)
    groupby_obj.apply(sampling_rate, interval=12.8, step=12.8) \
        .final_join(join_func=delayed(join_as_dataframe))

    groupby_obj.visualize_workflow(filename='test_apply_by_window.pdf')
    result = groupby_obj.compute(
        scheduler='processes').show_profiling().get_result()
    result.to_csv('test.csv', index=True)
Esempio n. 10
0
 def load_data(item, all_items):
     metas = GroupBy.get_meta(item)
     data_loader = delayed(fileio.load_sensor)
     return GroupBy.bundle(data_loader(GroupBy.get_data(item)), **metas)
Esempio n. 11
0
 def sort_func(item):
     return dataset.get_file_timestamp(GroupBy.get_data(item))