Python CreateDataset 예제들, Chapter2.CreateDataset.CreateDataset Python 예제들

예제 #1

0

파일 보기

파일: our_ch2.py 프로젝트: tomescumihail93/ML4QS

    os.makedirs(result_dataset_path)

# Chapter 2: Initial exploration of the dataset.

# Set a granularity (i.e. how big are our discrete time steps). We start very
# coarse grained, namely one measurement per minute, and secondly use four measurements
# per second

granularities = [60000, 250]
datasets_own = []
datasets_cs = []

for milliseconds_per_instance in granularities:

    # Create an initial dataset object with the base directory for our data and a granularity
    DataSetOwn = CreateDataset(dataset_path_own, milliseconds_per_instance)
#    DataSetCS = CreateDataset(dataset_path_cs, milliseconds_per_instance)

    # Add the selected measurements to it.

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSetOwn.add_numerical_dataset('acc_custom.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_phone_')
#    DataSetCS.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_phone_')
#    DataSetCS.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSetOwn.add_numerical_dataset('gyro_custom.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_')
#    DataSetCS.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_')
#    DataSetCS.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_watch_')

예제 #2

0

파일 보기

def main():
    # Set a granularity (the discrete step size of our time series data) and choose if all resulting datasets should
    # be saved. A course-grained granularity of one instance per minute, and a fine-grained one with four instances
    # per second are used.
    GRANULARITIES = [60000, 250]
    SAVE_VERSIONS = False

    # We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
    [path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]]

    # Create object to visualize the data and save figures
    DataViz = VisualizeDataset(module_path=__file__)

    datasets = []
    for milliseconds_per_instance in GRANULARITIES:
        print(
            f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.')

        # Create an initial dataset object with the base directory for our data and a granularity and add selected
        # measurements to it
        data_engineer = CreateDataset(base_dir=DATASET_PATH, granularity=milliseconds_per_instance)

        # Add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='accelerometer_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_phone_')
        data_engineer.add_numerical_dataset(file='accelerometer_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_watch_')

        # Add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='gyroscope_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_phone_')
        data_engineer.add_numerical_dataset(file='gyroscope_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_watch_')

        # Add the heart rate (continuous numerical measurements) and aggregate by averaging the values
        data_engineer.add_numerical_dataset(file='heart_rate_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['rate'], aggregation='avg', prefix='hr_watch_')

        # Add the labels provided by the users as binary attributes (i.e. add a one to the attribute representing the
        # specific value for a label if it occurs within an interval). These are categorical events that might overlap.
        data_engineer.add_event_dataset(file='labels.csv', start_timestamp_col='label_start',
                                        end_timestamp_col='label_end',
                                        value_col='label', aggregation='binary')

        # Add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging
        data_engineer.add_numerical_dataset(file='light_phone.csv', timestamp_col='timestamps', value_cols=['lux'],
                                            aggregation='avg', prefix='light_phone_')

        # Add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='magnetometer_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_phone_')
        data_engineer.add_numerical_dataset(file='magnetometer_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_watch_')

        # Add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again
        data_engineer.add_numerical_dataset(file='pressure_phone.csv', timestamp_col='timestamps',
                                            value_cols=['pressure'],
                                            aggregation='avg', prefix='press_phone_')

        # Get the resulting pandas data table
        dataset = data_engineer.data_table

        # Create boxplots
        DataViz.plot_dataset_boxplot(dataset=dataset, cols=['acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x',
                                                            'acc_watch_y', 'acc_watch_z'])

        # Plot all data
        DataViz.plot_dataset(data_table=dataset,
                             columns=['acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_', 'press_phone_',
                                      'label'],
                             match=['like', 'like', 'like', 'like', 'like', 'like', 'like', 'like'],
                             display=['line', 'line', 'line', 'line', 'line', 'line', 'points', 'points'])

        # Print a summary of the dataset
        util.print_statistics(dataset=dataset)
        datasets.append(copy.deepcopy(dataset))

        # Save the various versions of the created datasets with logical filenames if needed
        if SAVE_VERSIONS:
            dataset.to_csv(RESULT_PATH / f'chapter2_result_{milliseconds_per_instance}')

    # Make a table like the one shown in the book, comparing the two datasets produced
    util.print_latex_table_statistics_two_datasets(dataset1=datasets[0], dataset2=datasets[1])

    # Finally, store the last dataset we generated (250 ms)
    dataset.to_csv(RESULT_PATH / RESULT_FNAME)

예제 #3

0

파일 보기

파일: Assignment1_2.py 프로젝트: kim66003/ML4QS_group25

from Chapter2.CreateDataset import CreateDataset
from util.VisualizeDataset import VisualizeDataset
from util import util
from pathlib import Path
import copy
import os
import sys

DATASET_PATH = './datasets/crowdsignals/csv-participant-one/'
RESULT_PATH = './intermediate_datafiles/'
RESULT_FNAME = sys.argv[2] if len(sys.argv) > 2 else 'chapter2_result.csv'

dataset = CreateDataset(DATASET_PATH, 250)

dataset.add_numerical_dataset('accelerometer_phone.csv', 'timestamps',
                              ['x', 'y', 'z'], 'avg', 'acc_phone_', None)
dataset.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps',
                              ['x', 'y', 'z'], 'avg', 'acc_watch_', None)

dataset.add_numerical_dataset('gyroscope_phone.csv', 'timestamps',
                              ['x', 'y', 'z'], 'avg', 'gyr_phone_', None)
dataset.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps',
                              ['x', 'y', 'z'], 'avg', 'gyr_watch_', None)

dataset.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label',
                          'binary')

dataset = dataset.data_table
dataset_walking = dataset[dataset['labelWalking'] == 1]
dataset_sitting = dataset[dataset['labelSitting'] == 1]
dataset_running = dataset[dataset['labelRunning'] == 1]

예제 #4

0

파일 보기

파일: crowdsignals_ch2.py 프로젝트: sheridavandenbent/ML4QS

RESULT_PATH = Path('./intermediate_datafiles/our_data/')
RESULT_FNAME = sys.argv[2] if len(sys.argv) > 2 else 'chapter2_result.csv'

# Set a granularity (the discrete step size of our time series data). We'll use a course-grained granularity of one
# instance per minute, and a fine-grained one with four instances per second.
GRANULARITIES = [500, 100]

# We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
[path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]]

datasets = []
for milliseconds_per_instance in GRANULARITIES:
    print(f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.')

    # Create an initial dataset object with the base directory for our data and a granularity
    dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance)

    # Add the selected measurements to it.

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    # dataset.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_phone_')
    # dataset.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    # dataset.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_')
    # dataset.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_watch_')

    # We add the heart rate (continuous numerical measurements) and aggregate by averaging again
    # dataset.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_')

예제 #5

0

파일 보기

    print('Creating result directory: ' + result_dataset_path)
    os.makedirs(result_dataset_path)

# Chapter 2: Initial exploration of the dataset.

# Set a granularity (i.e. how big are our discrete time steps). We start very
# coarse grained, namely one measurement per minute, and secondly use four measurements
# per second

granularities = [60000, 250]
datasets = []

for milliseconds_per_instance in granularities:

    # Create an initial dataset object with the base directory for our data and a granularity
    DataSet = CreateDataset(dataset_path, milliseconds_per_instance)

    # Add the selected measurements to it.

    # Add numerical measurements
    DataSet.add_numerical_dataset('accelerometer.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'acc_')

    DataSet.add_numerical_dataset('linear_acceleration.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'lin_acc_')

    DataSet.add_numerical_dataset('magnetometer.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'mag_')

    DataSet.add_numerical_dataset('Gyroscope.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'gyr_')

예제 #6

0

파일 보기

파일: crowdsignals_ch2-own.py 프로젝트: raki123/mlqs

    print('Creating result directory: ' + result_dataset_path)
    os.makedirs(result_dataset_path)

# Chapter 2: Initial exploration of the dataset.

# Set a granularity (i.e. how big are our discrete time steps). We start very
# coarse grained, namely one measurement per minute, and secondly use four measurements
# per second

granularities = [250]
datasets = []

for milliseconds_per_instance in granularities:

    # Create an initial dataset object with the base directory for our data and a granularity
    DataSet = CreateDataset(dataset_path, milliseconds_per_instance)

    # Add the selected measurements to it.

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('accelerometer-kx023.csv_out.csv',
                                  'timestamp', ['x', 'y', 'z'], 'avg',
                                  'acc_phone_')
    print("first set")
    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('orientation.csv_out.csv', 'timestamp',
                                  ['x', 'y', 'z'], 'avg', 'gyr_phone_')
    print("second set")
    # We add the labels provided by the users. These are categorical events that might overlap. We add them

예제 #7

0

파일 보기

파일: crowdsignals_ch2.py 프로젝트: Mick-IJzer/ML4QS

# instance per minute, and a fine-grained one with four instances per second.
milliseconds_per_instance = 30

# We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
[
    path.mkdir(exist_ok=True, parents=True)
    for path in [DATASET_PATH, RESULT_PATH]
]

datasets = []
for personid in os.listdir(DATASET_PATH):
    DATASET_PATH_ = DATASET_PATH / personid
    print(f'Creating numerical datasets from files in {DATASET_PATH_}.')

    # Create an initial dataset object with the base directory for our data and a granularity
    dataset = CreateDataset(DATASET_PATH_, milliseconds_per_instance)

    # Add the selected measurements to it.

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    dataset.add_numerical_dataset('motion.csv', 'time',
                                  ['acc_x', 'acc_y', 'acc_z'], 'avg', '')
    #dataset.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    dataset.add_numerical_dataset('heart_rate.csv', 'time', ['heartrate'],
                                  'avg', '')
    dataset.add_numerical_dataset('steps.csv', 'time', ['steps'], 'avg', '')
    #dataset.add_numerical_dataset('labels.csv', 'time', ['label'], 'avg', '')

예제 #8

0

파일 보기

GRANULARITIES = [1000, 15000]

# We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
[
    path.mkdir(exist_ok=True, parents=True)
    for path in [DATASET_PATH, RESULT_PATH]
]

datasets = []
for milliseconds_per_instance in GRANULARITIES:
    print(
        f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.'
    )

    # Create an initial dataset object with the base directory for our data and a granularity
    dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance)

    # Add the selected measurements to it.
    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    dataset.add_numerical_dataset('Accelerometer.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'acc_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values
    dataset.add_numerical_dataset('Gyroscope.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'gyr_')

    # We add the heart rate (continuous numerical measurements) and aggregate by averaging again

    # We add the labels provided by the users. These are categorical events that might overlap. We add them

예제 #9

0

파일 보기

prefix5 = "linacc_"
file5 = "LinearAcceleration.csv"
cols5 = [
    "Linear Acceleration x (m/s^2)", "Linear Acceleration y (m/s^2)",
    "Linear Acceleration z (m/s^2)"
]
cols_pre5 = [prefix5 + x for x in cols5]

prefix6 = "hr_"
file6 = "heartdf2.csv"
cols6 = ["Heart Rate"]
cols_pre6 = [prefix6 + x for x in cols5]

# DataSet = CreateDataset(dataset_path, 60000)
DataSet = CreateDataset(dataset_path, 500)

DataSet.add_numerical_dataset(file1, 'Time (s)', cols1, 'avg', prefix1)
DataSet.add_numerical_dataset(file2, 'Time (s)', cols2, 'avg', prefix2)
DataSet.add_numerical_dataset(file3, 'Time (s)', cols3, 'avg', prefix3)
DataSet.add_numerical_dataset(file4, 'Time (s)', cols4, 'avg', prefix4)
DataSet.add_numerical_dataset(file5, 'Time (s)', cols5, 'avg', prefix5)
DataSet.add_numerical_dataset(file6, 'Time (s)', cols6, 'avg', prefix6)

DataSet.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label',
                          'binary')

dataset = DataSet.data_table

dataset.to_csv(result_dataset_path + "chapter2_final2s.csv")

예제 #10

0

파일 보기

파일: Assignment1_1.py 프로젝트: kim66003/ML4QS_group25

    'Gyroscope.csv': 'gyr_',
    'Linear Acceleration.csv': 'lin_',
    'Location.csv': 'loc_',
    'Magnetometer.csv': 'mag_'
}

time_column_name = 'Time (s)'

granularities = [60000, 1000, 250]

task = 'final_plot'

if __name__ == '__main__':
    if task == '1_1':
        sensor = 'Gyroscope.csv'
        dataset = CreateDataset('datasets/Running_2020-06-04_12-40-48/', 250)
        dataset.add_numerical_dataset('Gyroscope.csv', time_column_name,
                                      sensors[sensor], 'avg',
                                      axis_abbreviations[sensor])
        dataset = dataset.data_table
        fig = plt.figure(figsize=(5, 3.5))
        ax = fig.add_subplot(111)
        ax.boxplot([
            dataset['gyr_Gyroscope x (rad/s)'],
            dataset['gyr_Gyroscope y (rad/s)'],
            dataset['gyr_Gyroscope z (rad/s)']
        ],
                   widths=0.6)
        xlabels = ["gyr_x", "gyr_y", 'gyr_z']
        ax.set_xticklabels(xlabels)
        plt.ylim([-5, 5])

예제 #11

0

파일 보기

    print('Creating result directory: ' + result_dataset_path)
    os.makedirs(result_dataset_path)

# Chapter 2: Initial exploration of the dataset.

# Set a granularity (i.e. how big are our discrete time steps). We start very
# coarse grained, namely one measurement per minute, and secondly use four measurements
# per second

granularities = [2000]
datasets = []

for milliseconds_per_instance in granularities:

    # Create an initial dataset object with the base directory for our data and a granularity
    DataSet = CreateDataset(dataset_path, milliseconds_per_instance)

    # Add the selected measurements to it.

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('accelerometer_phone.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'acc_phone_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('gyroscope_phone.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'gyr_phone_')

    # We add the heart rate (continuous numerical measurements) and aggregate by averaging again
    #DataSet.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_')

예제 #12

0

파일 보기

파일: crowdsignals_ch2.py 프로젝트: saBasKouw/Machine-Learning-Quantified-Self

    print('Creating result directory: ' + result_dataset_path)
    os.makedirs(result_dataset_path)

# Chapter 2: Initial exploration of the dataset.

# Set a granularity (i.e. how big are our discrete time steps). We start very
# coarse grained, namely one measurement per minute, and secondly use four measurements
# per second

granularities = [8]
datasets = []

for milliseconds_per_instance in granularities:

    # Create an initial dataset object with the base directory for our data and a granularity
    DataSet = CreateDataset(dataset_path, milliseconds_per_instance)

    # Add the selected measurements to it.

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('EMG_data.csv', 'timestamps',
                                  ['a', 'b', 'c', 'd', 'e', 'g', 'h', 'i'],
                                  'avg', 'EMG_')
    # DataSet.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    # DataSet.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_')
    # DataSet.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_watch_')

예제 #13

0

파일 보기

GRANULARITIES = [250]

# We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
[
    path.mkdir(exist_ok=True, parents=True)
    for path in [DATASET_PATH, RESULT_PATH]
]

datasets = []
for milliseconds_per_instance in GRANULARITIES:
    print(
        f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.'
    )

    # Create an initial dataset object with the base directory for our data and a granularity
    dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance)

    # Add the selected measurements to it.
    if user == 'user_2':
        dataset.add_numerical_dataset('accelerometer_phone.csv', 'timestamps',
                                      ['x', 'y', 'z'], 'avg', 'acc_phone_')
        dataset.add_numerical_dataset('proximity_phone.csv', 'timestamps',
                                      ['distance'], 'avg', 'prox_phone_')
        dataset.add_event_dataset('labels.csv', 'label_start', 'label_end',
                                  'label', 'binary')
        dataset = dataset.data_table

        # Plot the data
        DataViz = VisualizeDataset(__file__, user)

        # Boxplot

예제 #14

0

파일 보기

파일: crowdsignals_ch2.py 프로젝트: Courses-VU/ML4QS

    print('Creating result directory: ' + result_dataset_path)
    os.makedirs(result_dataset_path)

# Chapter 2: Initial exploration of the dataset.

# Set a granularity (i.e. how big are our discrete time steps). We start very
# coarse grained, namely one measurement per minute, and secondly use four measurements
# per second

granularities = [60000, 250]
datasets = []

for milliseconds_per_instance in granularities:

    # Create an initial dataset object with the base directory for our data and a granularity
    DataSet = CreateDataset(dataset_path, milliseconds_per_instance)

    # Add the selected measurements to it.

    # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('accelerometer_phone.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'acc_phone_')
    DataSet.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'acc_watch_')

    # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
    # and aggregate the values per timestep by averaging the values/
    DataSet.add_numerical_dataset('gyroscope_phone.csv', 'timestamps',
                                  ['x', 'y', 'z'], 'avg', 'gyr_phone_')
    DataSet.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps',

예제 #15

0

파일 보기

파일: crowdsignals_ch2.py 프로젝트: antreashp/DM_ass1_rs

    RESULT_FNAME = sys.argv[2] if len(sys.argv) > 2 else 'chapter2_result.csv'

    # Set a granularity (the discrete step size of our time series data). We'll use a course-grained granularity of one
    # instance per minute, and a fine-grained one with four instances per second.
    GRANULARITIES = [86400000]

    # We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
    [path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]]


    datasets = []
    for milliseconds_per_instance in GRANULARITIES:
        print(f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.')

        # Create an initial dataset object with the base directory for our data and a granularity
        dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance)

        # Add the selected measurements to it.
        # if user == 'user_2':
        try:
            dataset.add_numerical_dataset('activity.csv', 'time', ['value'], 'avg', 'act')
        except:
            pass
        try:
            dataset.add_numerical_dataset('appCat_builtin.csv', 'time', ['value'], 'avg', 'built')
        except:
            pass
        try:
            dataset.add_numerical_dataset('appCat_communication.csv', 'time', ['value'], 'avg', 'comm')        
        except:
            pass

예제 #16

0

파일 보기

    print('Creating result directory: ' + result_dataset_path)
    os.makedirs(result_dataset_path)

# Chapter 2: Initial exploration of the dataset.

# Set a granularity (i.e. how big are our discrete time steps). We start very
# coarse grained, namely one measurement per minute, and secondly use four measurements
# per second

granularities = [60000, 250]
datasets = []

for milliseconds_per_instance in granularities:

    # Create an initial dataset object with the base directory for our data and a granularity
    DataSet = CreateDataset(dataset_path, milliseconds_per_instance)

    # Add the selected measurements to it.

    # Add numerical measurements
    DataSet.add_numerical_dataset('A01_parsed_raw_data.csv', 'timestamp',
                                  ['ankle_l_x', 'ankle_l_y', 'ankle_l_z', 'ankle_r_x', 'ankle_r_y', 'ankle_r_z',
                                   'belt_x', 'belt_y', 'belt_z', 'chest_x', 'chest_y', 'chest_z'], 'avg', '')

    # We add the labels provided by the users. These are categorical events that might overlap. We add them
    # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it
    # occurs within an interval).
    DataSet.add_binary_labels_dataset('A01_parsed_raw_data.csv', 'timestamp',
                                      ['labelWalking', 'labelFalling', 'labelLyingDown', 'labelLying',
                                       'labelSittingDown', 'labelSitting', 'labelStandingFromLying', 'labelOnAllFours',
                                       'labelSittingOnTheGround', 'labelStandingFromSitting',

예제 #17

0

파일 보기

my_path = os.path.abspath(os.path.dirname(__file__))
path = os.path.join(my_path, "..\\data\daten-neu")

# print os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))

folders = os.listdir(path)  # labels

milliseconds_per_instance = 50
samples_dataframe = pd.DataFrame()
frames = []

for label in folders:
    samples = os.listdir(path + "\\" + label)
    for sample in samples:
        sensors = os.listdir(path + "\\" + label + "\\" + sample)
        dataSet = CreateDataset(path + "\\" + label + "\\" + sample + "\\", milliseconds_per_instance)
        # for sensor in sensors:
        dataSet.add_numerical_dataset("Accelerometer.csv", 'Time (s)', ['X (m/s^2)', 'Y (m/s^2)', 'Z (m/s^2)'], 'avg',
                                      "Accelerometer")
        dataSet.add_numerical_dataset("Gyroscope.csv", 'Time (s)', ['X (rad/s)', 'Y (rad/s)', 'Z (rad/s)'], 'avg',
                                      "Gyroscope")

        dataSet.data_table = dataSet.data_table[~(np.isnan(dataSet.data_table['GyroscopeZ (rad/s)']))]  # todo: useful?

        length = len(dataSet.data_table)



        dataSet.data_table = dataSet.data_table[(length - 53): (length - 1)]  # same length for every sample

        FreqAbs = FourierTransformation()