Python CreateDataset.add_event_datasetの例、Chapter2.CreateDataset.CreateDataset.add_event_dataset Pythonの例

コード例 #1

0

ファイルを表示

ファイル: crowdsignals_ch2.py プロジェクト: Courses-VU/ML4QS

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('gyroscope_phone.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'gyr_phone_')
    DataSet.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'gyr_watch_')

    # We add the heart rate (continuous numerical measurements) and aggregate by averaging again
    DataSet.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps',
                                  ['rate'], 'avg', 'hr_watch_')

    # We add the labels provided by the users. These are categorical events that might overlap. We add them
    # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it
    # occurs within an interval).
    DataSet.add_event_dataset('labels.csv', 'label_start', 'label_end',
                              'label', 'binary')

    # We add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging again
    DataSet.add_numerical_dataset('light_phone.csv', 'timestamps', ['lux'],
                                  'avg', 'light_phone_')

    # We add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    DataSet.add_numerical_dataset('magnetometer_phone.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'mag_phone_')
    DataSet.add_numerical_dataset('magnetometer_smartwatch.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'mag_watch_')

    # We add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again
    DataSet.add_numerical_dataset('pressure_phone.csv', 'timestamps',
                                  ['pressure'], 'avg', 'press_phone_')

コード例 #2

0

ファイルを表示

def main():
    # Set a granularity (the discrete step size of our time series data) and choose if all resulting datasets should
    # be saved. A course-grained granularity of one instance per minute, and a fine-grained one with four instances
    # per second are used.
    GRANULARITIES = [60000, 250]
    SAVE_VERSIONS = False

    # We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
    [path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]]

    # Create object to visualize the data and save figures
    DataViz = VisualizeDataset(module_path=__file__)

    datasets = []
    for milliseconds_per_instance in GRANULARITIES:
        print(
            f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.')

        # Create an initial dataset object with the base directory for our data and a granularity and add selected
        # measurements to it
        data_engineer = CreateDataset(base_dir=DATASET_PATH, granularity=milliseconds_per_instance)

        # Add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='accelerometer_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_phone_')
        data_engineer.add_numerical_dataset(file='accelerometer_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_watch_')

        # Add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='gyroscope_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_phone_')
        data_engineer.add_numerical_dataset(file='gyroscope_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_watch_')

        # Add the heart rate (continuous numerical measurements) and aggregate by averaging the values
        data_engineer.add_numerical_dataset(file='heart_rate_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['rate'], aggregation='avg', prefix='hr_watch_')

        # Add the labels provided by the users as binary attributes (i.e. add a one to the attribute representing the
        # specific value for a label if it occurs within an interval). These are categorical events that might overlap.
        data_engineer.add_event_dataset(file='labels.csv', start_timestamp_col='label_start',
                                        end_timestamp_col='label_end',
                                        value_col='label', aggregation='binary')

        # Add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging
        data_engineer.add_numerical_dataset(file='light_phone.csv', timestamp_col='timestamps', value_cols=['lux'],
                                            aggregation='avg', prefix='light_phone_')

        # Add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='magnetometer_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_phone_')
        data_engineer.add_numerical_dataset(file='magnetometer_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_watch_')

        # Add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again
        data_engineer.add_numerical_dataset(file='pressure_phone.csv', timestamp_col='timestamps',
                                            value_cols=['pressure'],
                                            aggregation='avg', prefix='press_phone_')

        # Get the resulting pandas data table
        dataset = data_engineer.data_table

        # Create boxplots
        DataViz.plot_dataset_boxplot(dataset=dataset, cols=['acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x',
                                                            'acc_watch_y', 'acc_watch_z'])

        # Plot all data
        DataViz.plot_dataset(data_table=dataset,
                             columns=['acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_', 'press_phone_',
                                      'label'],
                             match=['like', 'like', 'like', 'like', 'like', 'like', 'like', 'like'],
                             display=['line', 'line', 'line', 'line', 'line', 'line', 'points', 'points'])

        # Print a summary of the dataset
        util.print_statistics(dataset=dataset)
        datasets.append(copy.deepcopy(dataset))

        # Save the various versions of the created datasets with logical filenames if needed
        if SAVE_VERSIONS:
            dataset.to_csv(RESULT_PATH / f'chapter2_result_{milliseconds_per_instance}')

    # Make a table like the one shown in the book, comparing the two datasets produced
    util.print_latex_table_statistics_two_datasets(dataset1=datasets[0], dataset2=datasets[1])

    # Finally, store the last dataset we generated (250 ms)
    dataset.to_csv(RESULT_PATH / RESULT_FNAME)

コード例 #3

0

ファイルを表示

ファイル: our_ch2.py プロジェクト: tomescumihail93/ML4QS

#    DataSetCS.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_phone_')
#    DataSetCS.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSetOwn.add_numerical_dataset('gyro_custom.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_')
#    DataSetCS.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_')
#    DataSetCS.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_watch_')

    # We add the heart rate (continuous numerical measurements) and aggregate by averaging again
#    DataSetCS.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_')

    # We add the labels provided by the users. These are categorical events that might overlap. We add them
    # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it
    # occurs within an interval).
    DataSetOwn.add_event_dataset('labels_custom.csv', 'label_start', 'label_end', 'label', 'binary')
#    DataSetCS.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary')

    # We add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging again
    # amount of light sensed is missing from our dataset
#    DataSetCS.add_numerical_dataset('light_phone.csv', 'timestamps', ['lux'], 'avg', 'light_phone_')

    # We add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    DataSetOwn.add_numerical_dataset('mag_custom.csv', 'timestamps', ['x','y','z'], 'avg', 'mag_phone_')
    DataSetOwn.add_numerical_dataset('press_custom.csv', 'timestamps', ['pressure'], 'avg', 'press_phone_')
#    DataSetCS.add_numerical_dataset('magnetometer_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'mag_phone_')
#    DataSetCS.add_numerical_dataset('magnetometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'mag_watch_')

    # We add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again
    DataSetOwn.add_numerical_dataset('pedom_custom.csv', 'timestamps', ['steps', 'distance'], 'avg', 'pedom_phone_')

コード例 #4

0

ファイルを表示

ファイル: crowdsignals_ch2-own.py プロジェクト: raki123/mlqs

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('accelerometer-kx023.csv_out.csv',
                                  'timestamp', ['x', 'y', 'z'], 'avg',
                                  'acc_phone_')
    print("first set")
    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('orientation.csv_out.csv', 'timestamp',
                                  ['x', 'y', 'z'], 'avg', 'gyr_phone_')
    print("second set")
    # We add the labels provided by the users. These are categorical events that might overlap. We add them
    # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it
    # occurs within an interval).
    DataSet.add_event_dataset('status.csv', 'timestampBeg', 'timestampEnd',
                              'label', 'binary')

    # We add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging again
    DataSet.add_numerical_dataset('light-bh1745.csv_out.csv', 'timestamp',
                                  ['lux'], 'avg', 'light_phone_')
    print("third set")
    # We add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    DataSet.add_numerical_dataset('mag-akm09911.csv_out.csv', 'timestamp',
                                  ['x', 'y', 'z'], 'avg', 'mag_phone_')
    print("fourth set")
    # We add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again

    # Get the resulting pandas data table

    dataset = DataSet.data_table

コード例 #5

0

ファイルを表示

ファイル: crowdsignals_ch2.py プロジェクト: Mick-IJzer/ML4QS

    #dataset.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    dataset.add_numerical_dataset('heart_rate.csv', 'time', ['heartrate'],
                                  'avg', '')
    dataset.add_numerical_dataset('steps.csv', 'time', ['steps'], 'avg', '')
    #dataset.add_numerical_dataset('labels.csv', 'time', ['label'], 'avg', '')

    # We add the heart rate (continuous numerical measurements) and aggregate by averaging again
    #dataset.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_')

    # We add the labels provided by the users. These are categorical events that might overlap. We add them
    # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it
    # occurs within an interval).
    dataset.add_event_dataset('labels.csv', 'time', 'label', 'sum')

    # We add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging
    #dataset.add_numerical_dataset('light_phone.csv', 'timestamps', ['lux'], 'avg', 'light_phone_')

    # We add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    #dataset.add_numerical_dataset('magnetometer_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'mag_phone_')
    #dataset.add_numerical_dataset('magnetometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'mag_watch_')

    # We add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again
    #dataset.add_numerical_dataset('pressure_phone.csv', 'timestamps', ['pressure'], 'avg', 'press_phone_')

    # Get the resulting pandas data table
    dataset = dataset.data_table
    #dataset = dataset[dataset['heartrate'].notnull()]

コード例 #6

0

ファイルを表示

]

datasets = []
for milliseconds_per_instance in GRANULARITIES:
    print(
        f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.'
    )

    # Create an initial dataset object with the base directory for our data and a granularity
    dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance)

    # Add the selected measurements to it.

    dataset.add_numerical_dataset('accelerometer.csv', 'timestamp',
                                  ['x', 'y', 'z'], 'avg', 'acc_phone_', True)
    dataset.add_event_dataset('labels.csv', 'label_start', 'label_end',
                              'label', 'binary', True)
    dataset.add_numerical_dataset('gyroscope.csv', 'timestamp',
                                  ['x', 'y', 'z'], 'avg', 'gyr_phone_', True)
    dataset.add_numerical_dataset('barometer.csv', 'timestamp', ['x'], 'avg',
                                  'bar_phone_', True)
    dataset.add_numerical_dataset('linear_accelerometer.csv', 'timestamp',
                                  ['x', 'y', 'z'], 'avg', 'lin_acc_phone_',
                                  True)
    # dataset.add_numerical_dataset('location.csv', 'timestamp', ['latitude','longitude','height', 'velocity', 'direction', 'horizontal_accuracy', 'vertical_accuracy'], 'avg', 'loc_phone_', True)
    dataset.add_numerical_dataset('magnetometer.csv', 'timestamp',
                                  ['x', 'y', 'z'], 'avg', 'mag_phone_', True)
    dataset.add_numerical_dataset('proximity.csv', 'timestamp', ['distance'],
                                  'avg', 'prox_phone_', True)

    # Get the resulting pandas data table
    dataset = dataset.data_table