Example #1
0
def main():
    DataViz = VisualizeDataset()

    dataset_path = './intermediate_datafiles/'
    try:
        dataset = pd.read_csv(dataset_path + 'chapter3_result_final.csv',
                              index_col=0)
    except IOError as e:
        print(
            'File not found, try to run previous crowdsignals scripts first!')
        raise e

    dataset.index = dataset.index.to_datetime()
    milliseconds_per_instance = (dataset.index[1] -
                                 dataset.index[0]).microseconds / 1000

    # Now we move to the frequency domain, with the same window size.

    FreqAbs = FourierTransformation()
    fs = float(1000) / milliseconds_per_instance

    periodic_predictor_cols = [
        'acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x',
        'acc_watch_y', 'acc_watch_z', 'gyr_phone_x', 'gyr_phone_y',
        'gyr_phone_z', 'gyr_watch_x', 'gyr_watch_y', 'gyr_watch_z',
        'mag_phone_x', 'mag_phone_y', 'mag_phone_z', 'mag_watch_x',
        'mag_watch_y', 'mag_watch_z'
    ]
    data_table = FreqAbs.abstract_frequency(
        copy.deepcopy(dataset), ['acc_phone_x'],
        int(float(10000) / milliseconds_per_instance), fs)

    # Spectral analysis.

    DataViz.plot_dataset(data_table, [
        'acc_phone_x_max_freq', 'acc_phone_x_freq_weighted', 'acc_phone_x_pse',
        'label'
    ], ['like', 'like', 'like', 'like'], ['line', 'line', 'line', 'points'])

    dataset = FreqAbs.abstract_frequency(
        dataset, periodic_predictor_cols,
        int(float(10000) / milliseconds_per_instance), fs)

    # Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike.

    # The percentage of overlap we allow
    window_overlap = 0.9
    skip_points = int((1 - window_overlap) * ws)
    dataset = dataset.iloc[::skip_points, :]

    DataViz.plot_dataset(
        dataset, [
            'acc_phone_x', 'gyr_phone_x', 'hr_watch_rate', 'light_phone_lux',
            'mag_phone_x', 'press_phone_', 'pca_1', 'label'
        ], ['like', 'like', 'like', 'like', 'like', 'like', 'like', 'like'],
        ['line', 'line', 'line', 'line', 'line', 'line', 'line', 'points'])
Example #2
0
# dataset = NumAbs.abstract_numerical(dataset, periodic_predictor_cols, ws, 'std')

print('temporal', dataset.shape)

print('attributes frequency domain')

# Now we move to the frequency domain, with the same window size.

FreqAbs = FourierTransformation()
fs = float(1000) / milliseconds_per_instance

print('frequency', dataset.shape)
# Spectral analysis.

dataset = FreqAbs.abstract_frequency(
    dataset, periodic_predictor_cols,
    int(float(10000) / milliseconds_per_instance), fs)

print('frequency all col', dataset.shape)
for col in dataset.columns:
    print(col, dataset[dataset[col].isna() == True].count())
# Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike.

# pickle.dump(dataset, open('concat_no_skipping.pkl', 'wb'))
# The percentage of overlap we allow
# window_overlap = 0.9
# skip_points = int((1-window_overlap) * ws)
# dataset = dataset.iloc[::skip_points,:]
dataset.to_csv(features_watch_data)
print(dataset.shape)
CatAbs = CategoricalAbstraction()
dataset = CatAbs.abstract_categorical(
    dataset, ['label'], ['like'], 0.03,
    int(float(5 * 60000) / milliseconds_per_instance), 2)

# Now we move to the frequency domain, with the same window size.

FreqAbs = FourierTransformation()
fs = float(1000) / milliseconds_per_instance

periodic_predictor_cols = [
    'gravity.x', 'gravity.y', 'gravity.z', 'userAcceleration.x',
    'userAcceleration.y', 'userAcceleration.z'
]
data_table = FreqAbs.abstract_frequency(
    copy.deepcopy(dataset), ['userAcceleration.x'],
    int(float(10000) / milliseconds_per_instance), fs)

# Spectral analysis.

DataViz.plot_dataset(data_table, [
    'userAcceleration.x_max_freq', 'userAcceleration.x_freq_weighted',
    'userAcceleration.x_pse', 'label'
], ['like', 'like', 'like', 'like'], ['line', 'line', 'line', 'points'])

dataset = FreqAbs.abstract_frequency(
    dataset, periodic_predictor_cols,
    int(float(10000) / milliseconds_per_instance), fs)

# Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike.
Example #4
0
df = pd.DataFrame(np.arange(0, 16.1, float(1) / fs), columns=list('X'))
c1 = 3 * np.sin(2 * math.pi * 0.2 * df['X'])
c2 = 2 * np.sin(2 * math.pi * 0.25 * (df['X'] - 2)) + 5
df['Y'] = c1 + c2

plot.hold(True)
plot.plot(df['X'], df['Y'], 'b-')
plot.legend(['$example$ $measurement$ $sequence$'], loc=3, fontsize='small')
plot.xlabel('time')
plot.ylabel('$X_{1}$')
plot.show()

# Figure 4.2

FreqAbs = FourierTransformation()
data_table = FreqAbs.abstract_frequency(copy.deepcopy(df), ['Y'], 160, fs)
# Get the frequencies from the columns....
frequencies = []
values = []
for col in data_table.columns:
    val = re.findall(r'freq_\d+\.\d+_Hz', col)
    if len(val) > 0:
        frequency = float((val[0])[5:len(val) - 4])
        frequencies.append(frequency)
        values.append(data_table.ix[data_table.index, col])

fig = plot.figure()
plot.hold(True)
ax1 = fig.add_subplot(111)
plot.xlim([0, 5])
ax1.plot(frequencies, values, 'b+')
Example #5
0
periodic_predictor_cols = [
    'acc_phone_X', 'acc_phone_Y', 'acc_phone_Z', 'gyr_phone_X', 'gyr_phone_Y',
    'gyr_phone_Z', 'mag_phone_X', 'mag_phone_Y', 'mag_phone_Z'
]
#
# data_table = FreqAbs.abstract_frequency(copy.deepcopy(dataset), ['acc_phone_Y'],
#                                         int(float(4000) / milliseconds_per_instance), fs)

# Spectral analysis.

# DataViz.plot_dataset(data_table, ['acc_phone_Y_max_freq', 'acc_phone_Y_freq_weighted', 'acc_phone_Y_pse', 'label'],
#                      ['like', 'like', 'like', 'like'], ['line', 'line', 'line', 'points'])
# we use 4s
ws_freq = int(float(4000) / milliseconds_per_instance)
dataset = FreqAbs.abstract_frequency(dataset, periodic_predictor_cols, ws_freq,
                                     fs)

# Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike.

ws = int(
    float(4000) /
    milliseconds_per_instance)  # we remove 10% of the data for every second.
# The percentage of overlap we allow
window_overlap = 0.9
skip_points = int((1 - window_overlap) * ws)
dataset = dataset.iloc[::skip_points, :]

dataset.to_csv(DATA_PATH / RESULT_FNAME)

DataViz.plot_dataset(
    dataset, ['acc_phone_X', 'gyr_phone_X', 'mag_phone_X', 'pca_1', 'label'],
Example #6
0
categorical_abstraction_result_file = 'chapter4_categorical_result.csv'
task = 'frequency_plot'

dataset = pd.read_csv(DATA_PATH / DATASET_FNAME, index_col=0)
dataset.index = pd.Series({x: pd.to_datetime(x) for x in dataset.index})
DataViz = VisualizeDataset(__file__, show=False)
milliseconds_per_instance = (dataset.index[1] -
                             dataset.index[0]).microseconds / 1000

if task == 'frequency_plot':

    fs = float(1000) / milliseconds_per_instance
    FreqAbs = FourierTransformation()
    data_table = dataset[dataset['labelOnTable'] == 1]
    data_table = FreqAbs.abstract_frequency(
        copy.deepcopy(data_table), ['acc_phone_x'],
        int(float(10000) / milliseconds_per_instance), fs)
    frequencies = []
    values = []
    for col in data_table.columns:
        val = re.findall(r'freq_\d+\.\d+_Hz', col)
        if len(val) > 0:
            frequency = float((val[0])[5:len(val) - 4])
            frequencies.append(frequency)
            values.append(abs(data_table.loc[data_table.index,
                                             col]))  #absolute amp

    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    plt.xlim([0, 2])
    ax1.plot(frequencies, values, 'b+')
Example #7
0
def main():
    # Read the result from the previous chapter convert the index to datetime
    try:
        dataset = pd.read_csv(DATA_PATH / DATASET_FNAME, index_col=0)
        dataset.index = pd.to_datetime(dataset.index)
    except IOError as e:
        print(
            'File not found, try to run previous crowdsignals scripts first!')
        raise e

    # Create an instance of visualization class to plot the results
    DataViz = VisualizeDataset(__file__)

    # Compute the number of milliseconds covered by an instance based on the first two rows
    milliseconds_per_instance = (dataset.index[1] -
                                 dataset.index[0]).microseconds / 1000

    # Create objects for feature abstraction
    NumAbs = NumericalAbstraction()
    CatAbs = CategoricalAbstraction()
    FreqAbs = FourierTransformation()

    if FLAGS.mode == 'time':
        # Focus on the time domain first
        # Set the window sizes to the number of instances representing 5 seconds, 30 seconds and 5 minutes
        window_sizes = [
            int(float(5000) / milliseconds_per_instance),
            int(float(0.5 * 60000) / milliseconds_per_instance),
            int(float(5 * 60000) / milliseconds_per_instance)
        ]

        dataset_copy = copy.deepcopy(dataset)
        for ws in window_sizes:
            print(
                f'Abstracting numerical features for window size {ws * milliseconds_per_instance / 1000}s.'
            )
            dataset_copy = NumAbs.abstract_numerical(
                data_table=dataset_copy,
                cols=['acc_phone_x'],
                window_size=ws,
                aggregation_function='mean')
            dataset_copy = NumAbs.abstract_numerical(
                data_table=dataset_copy,
                cols=['acc_phone_x'],
                window_size=ws,
                aggregation_function='std')

        DataViz.plot_dataset(data_table=dataset_copy,
                             columns=[
                                 'acc_phone_x', 'acc_phone_x_temp_mean',
                                 'acc_phone_x_temp_std', 'label'
                             ],
                             match=['exact', 'like', 'like', 'like'],
                             display=['line', 'line', 'line', 'points'])

    elif FLAGS.mode == 'frequency':
        # Move to the frequency domain with the same window size
        fs = 1000.0 / milliseconds_per_instance
        ws = int(10000.0 / milliseconds_per_instance)

        data_table = FreqAbs.abstract_frequency(
            data_table=copy.deepcopy(dataset),
            cols=['acc_phone_x'],
            window_size=ws,
            sampling_rate=fs)
        # Spectral analysis
        DataViz.plot_dataset(data_table=data_table,
                             columns=[
                                 'acc_phone_x_max_freq',
                                 'acc_phone_x_freq_weighted',
                                 'acc_phone_x_pse', 'label'
                             ],
                             match=['like', 'like', 'like', 'like'],
                             display=['line', 'line', 'line', 'points'])

    elif FLAGS.mode == 'final':
        ws = int(float(0.5 * 60000) / milliseconds_per_instance)
        fs = 1000.0 / milliseconds_per_instance

        # Abstract time domain features and plot the result
        selected_predictor_cols = [
            c for c in dataset.columns if 'label' not in c
        ]
        print('Calculating mean and std for selected predictor cols.')
        dataset = NumAbs.abstract_numerical(data_table=dataset,
                                            cols=selected_predictor_cols,
                                            window_size=ws,
                                            aggregation_function='mean')
        dataset = NumAbs.abstract_numerical(data_table=dataset,
                                            cols=selected_predictor_cols,
                                            window_size=ws,
                                            aggregation_function='std')

        DataViz.plot_dataset(data_table=dataset,
                             columns=[
                                 'acc_phone_x', 'gyr_phone_x', 'hr_watch_rate',
                                 'light_phone_lux', 'mag_phone_x',
                                 'press_phone_', 'pca_1', 'label'
                             ],
                             match=[
                                 'like', 'like', 'like', 'like', 'like',
                                 'like', 'like', 'like'
                             ],
                             display=[
                                 'line', 'line', 'line', 'line', 'line',
                                 'line', 'line', 'points'
                             ])

        # Abstract categorical features
        print('Abstracting categorical features.')
        dataset = CatAbs.abstract_categorical(
            data_table=dataset,
            cols=['label'],
            match=['like'],
            min_support=0.03,
            window_size=int(float(5 * 60000) / milliseconds_per_instance),
            max_pattern_size=2)

        # Abstract frequency domain features
        periodic_predictor_cols = [
            'acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x',
            'acc_watch_y', 'acc_watch_z', 'gyr_phone_x', 'gyr_phone_y',
            'gyr_phone_z', 'gyr_watch_x', 'gyr_watch_y', 'gyr_watch_z',
            'mag_phone_x', 'mag_phone_y', 'mag_phone_z', 'mag_watch_x',
            'mag_watch_y', 'mag_watch_z'
        ]

        print('Abstracting frequency features.')
        dataset = FreqAbs.abstract_frequency(data_table=dataset,
                                             cols=periodic_predictor_cols,
                                             window_size=ws,
                                             sampling_rate=fs)

        # Take a certain percentage of overlap in the windows, otherwise training examples will be too much alike
        # Set the allowed percentage of overlap
        window_overlap = FLAGS.overlap
        skip_points = int((1 - window_overlap) * ws)
        dataset = dataset.iloc[::skip_points, :]

        # Plot the final dataset
        DataViz.plot_dataset(data_table=dataset,
                             columns=[
                                 'acc_phone_x', 'gyr_phone_x', 'hr_watch_rate',
                                 'light_phone_lux', 'mag_phone_x',
                                 'press_phone_', 'pca_1', 'label'
                             ],
                             match=[
                                 'like', 'like', 'like', 'like', 'like',
                                 'like', 'like', 'like'
                             ],
                             display=[
                                 'line', 'line', 'line', 'line', 'line',
                                 'line', 'line', 'points'
                             ])

        # Store the generated dataset
        dataset.to_csv(DATA_PATH / RESULT_FNAME)
Example #8
0
    dataset_cs, ['label'], ['like'], 0.03,
    int(float(5 * 60000) / milliseconds_per_instance), 2)

# Now we move to the frequency domain, with the same window size.

FreqAbs = FourierTransformation()
fs = float(1000) / milliseconds_per_instance

periodic_predictor_cols = [
    'acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x', 'acc_watch_y',
    'acc_watch_z', 'gyr_phone_x', 'gyr_phone_y', 'gyr_phone_z', 'gyr_watch_x',
    'gyr_watch_y', 'gyr_watch_z', 'mag_phone_x', 'mag_phone_y', 'mag_phone_z',
    'mag_watch_x', 'mag_watch_y', 'mag_watch_z'
]
data_table = FreqAbs.abstract_frequency(
    copy.deepcopy(dataset_cs), ['acc_phone_x'],
    int(float(10000) / milliseconds_per_instance), fs)

# Spectral analysis.

DataViz.plot_dataset(data_table, [
    'acc_phone_x_max_freq', 'acc_phone_x_freq_weighted', 'acc_phone_x_pse',
    'label'
], ['like', 'like', 'like', 'like'], ['line', 'line', 'line', 'points'])

dataset = FreqAbs.abstract_frequency(
    dataset_cs, periodic_predictor_cols,
    int(float(10000) / milliseconds_per_instance), fs)

# Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike.
Example #9
0
    dataset, ['label'], ['like'], 0.03,
    int(float(5 * 60000) / milliseconds_per_instance), 2)

# Now we move to the frequency domain, with the same window size.

FreqAbs = FourierTransformation()
fs = float(1000) / milliseconds_per_instance

periodic_predictor_cols = [
    'acc_X (m/s^2)', 'acc_Y (m/s^2)', 'acc_Z (m/s^2)', 'gyr_X (rad/s)',
    'gyr_Y (rad/s)', 'gyr_Z (rad/s)', 'mag_X (muT)', 'mag_Y (muT)',
    'mag_Z (muT)'
]

data_table = FreqAbs.abstract_frequency(
    copy.deepcopy(dataset), ['acc_X (m/s^2)'],
    int(float(10000) / milliseconds_per_instance), fs)

# Spectral analysis.

DataViz.plot_dataset(data_table, [
    'acc_X (m/s^2)_max_freq', 'acc_X (m/s^2)_freq_weighted',
    'acc_X (m/s^2)_pse', 'label'
], ['like', 'like', 'like', 'like'], ['line', 'line', 'line', 'points'])

dataset = FreqAbs.abstract_frequency(
    dataset, periodic_predictor_cols,
    int(float(10000) / milliseconds_per_instance), fs)

# Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike.