Esempio n. 1
0
original_dataset = pd.read_csv(dataset_path + 'chapter2_result.csv',
                               index_col=0)
original_dataset.index = original_dataset.index.to_datetime()
KalFilter = KalmanFilters()
kalman_dataset = KalFilter.apply_kalman_filter(original_dataset, 'acc_phone_x')
DataViz.plot_imputed_values(kalman_dataset, ['original', 'kalman'],
                            'acc_phone_x',
                            kalman_dataset['acc_phone_x_kalman'])
DataViz.plot_dataset(kalman_dataset, ['acc_phone_x', 'acc_phone_x_kalman'],
                     ['exact', 'exact'], ['line', 'line'])

# We ignore the Kalman filter output for now...

# Let us apply a lowpass filter and reduce the importance of the data above 1.5 Hz

LowPass = LowPassFilter()

# Determine the sampling frequency.
fs = float(1000) / milliseconds_per_instance
cutoff = 1.5

# Let us study acc_phone_x:
new_dataset = LowPass.low_pass_filter(copy.deepcopy(dataset),
                                      'acc_phone_x',
                                      fs,
                                      cutoff,
                                      order=10)
DataViz.plot_dataset(
    new_dataset.ix[int(0.4 *
                       len(new_dataset.index)):int(0.43 *
                                                   len(new_dataset.index)), :],
Esempio n. 2
0
    dataset = MisVal.impute_interpolate(dataset, col)

# Using the result from Chapter 2, let us try the Kalman filter on the light_phone_lux attribute and study the result.

original_dataset = pd.read_csv(DATA_PATH / ORIG_DATASET_FNAME, index_col=0)
original_dataset.index = pd.to_datetime(original_dataset.index)
KalFilter = KalmanFilters()
kalman_dataset = KalFilter.apply_kalman_filter(original_dataset, 'userAcceleration.x')
# DataViz.plot_imputed_values(kalman_dataset, ['original', 'kalman'], 'userAcceleration.x', kalman_dataset['userAcceleration.x_kalman'])
# DataViz.plot_dataset(kalman_dataset, ['userAcceleration.x', 'userAcceleration.x_kalman'], ['exact','exact'], ['line', 'line'])

# We ignore the Kalman filter output for now...

# Let us apply a lowpass filter and reduce the importance of the data above 1.5 Hz

LowPass = LowPassFilter()

# Determine the sampling frequency.
fs = float(1000)/milliseconds_per_instance
cutoff = 1.5

# Let us study acc_phone_x:
new_dataset = LowPass.low_pass_filter(copy.deepcopy(dataset), 'userAcceleration.x', fs, cutoff, order=10)
# DataViz.plot_dataset(new_dataset.iloc[int(0.3*len(new_dataset.index)):int(0.5*len(new_dataset.index)), :],
#                      ['userAcceleration.x', 'userAcceleration.x_lowpass'], ['exact','exact'], ['line', 'line'])

# And not let us include all measurements that have a form of periodicity (and filter them):
periodic_measurements = ['attitude.roll','attitude.pitch','attitude.yaw','gravity.x','gravity.y','gravity.z','rotationRate.x','rotationRate.y','rotationRate.z','userAcceleration.x','userAcceleration.y','userAcceleration.z']

for col in periodic_measurements:
    dataset = LowPass.low_pass_filter(dataset, col, fs, cutoff, order=10)
Esempio n. 3
0
plot.show()

# Figure 3.4

# Sample frequency (Hz)
fs = 100

# Create time points....
t = pd.DataFrame(np.arange(0, 16, float(1) / fs), columns=list('X'))
c1 = 3 * np.sin(2 * math.pi * 0.1 * t)
c2 = 2 * np.sin(2 * math.pi * t)
plot.hold(True)
plot.plot(t, c1, 'b--')
plot.plot(t, c2, 'b:')
plot.plot(t, c1 + c2, 'b-')
LowPass = LowPassFilter()
new_dataset = LowPass.low_pass_filter(c1 + c2,
                                      'X',
                                      fs,
                                      0.5,
                                      order=3,
                                      phase_shift=True)
plot.plot(t, new_dataset['X_lowpass'], 'r-')
plot.legend([
    '$3 \cdot sin(2 \cdot \pi \cdot 0.1 \cdot t))$',
    '$2 \cdot sin(2 \cdot \pi \cdot t))$', '$combined$',
    '$combined$ $after$ $filter (f_{c}=0.5Hz, n=3)$'
],
            loc=4,
            fontsize='small')
plot.xlabel('time')
Esempio n. 4
0
    dataset = MisVal.impute_interpolate(dataset, col)

# Let us try the Kalman filter on the light_phone_lux attribute and study the result.

# original_dataset = pd.read_csv(dataset_path + 'mydata_chapter2_result.csv', index_col=0)
# original_dataset.index = original_dataset.index.to_datetime()
# KalFilter = KalmanFilters()
# kalman_dataset = KalFilter.apply_kalman_filter(original_dataset, 'acc_phone_x')
# DataViz.plot_imputed_values(kalman_dataset, ['original', 'kalman'], 'acc_phone_x', kalman_dataset['acc_phone_x_kalman'])
# DataViz.plot_dataset(kalman_dataset, ['acc_phone_x', 'acc_phone_x_kalman'], ['exact','exact'], ['line', 'line'])

# We ignore the Kalman filter output for now...

# Let us apply a lowpass filter and reduce the importance of the data above 1.5 Hz

LowPass = LowPassFilter()

# Determine the sampling frequency.
fs = float(1000) / milliseconds_per_instance
cutoff = 1.5

# # Let us study acc_phone_x:
# new_dataset = LowPass.low_pass_filter(copy.deepcopy(dataset), 'acc_phone_x', fs, cutoff, order=10)
# DataViz.plot_dataset(new_dataset.ix[int(0.4*len(new_dataset.index)):int(0.43*len(new_dataset.index)), :], ['acc_phone_x', 'acc_phone_x_lowpass'], ['exact','exact'], ['line', 'line'])
#
# # Let us study acc_phone_y:
# new_dataset = LowPass.low_pass_filter(copy.deepcopy(dataset), 'acc_phone_y', fs, cutoff, order=10)
# DataViz.plot_dataset(new_dataset.ix[int(0.4*len(new_dataset.index)):int(0.43*len(new_dataset.index)), :], ['acc_phone_y', 'acc_phone_y_lowpass'], ['exact','exact'], ['line', 'line'])
#
# # Let us study acc_phone_z:
# new_dataset = LowPass.low_pass_filter(copy.deepcopy(dataset), 'acc_phone_z', fs, cutoff, order=10)
Esempio n. 5
0
def main():
    # Import the data from the specified location and parse the date index
    try:
        dataset = pd.read_csv(Path(DATA_PATH / DATASET_FNAME), index_col=0)
        dataset.index = pd.to_datetime(dataset.index)
    except IOError as e:
        print(
            'File not found, try to run previous crowdsignals scripts first!')
        raise e

    # Create an instance of our visualization class to plot the results
    DataViz = VisualizeDataset(__file__)

    # Compute the number of milliseconds covered by an instance based on the first two rows
    milliseconds_per_instance = (dataset.index[1] -
                                 dataset.index[0]).microseconds / 1000

    # Create objects for value imputation, low pass filter and PCA
    MisVal = ImputationMissingValues()
    LowPass = LowPassFilter()
    PCA = PrincipalComponentAnalysis()

    if FLAGS.mode == 'imputation':
        # Impute the missing values and plot an example
        imputed_mean_dataset = MisVal.impute_mean(
            dataset=copy.deepcopy(dataset), col='hr_watch_rate')
        imputed_median_dataset = MisVal.impute_median(
            dataset=copy.deepcopy(dataset), col='hr_watch_rate')
        imputed_interpolation_dataset = MisVal.impute_interpolate(
            dataset=copy.deepcopy(dataset), col='hr_watch_rate')
        DataViz.plot_imputed_values(
            dataset, ['original', 'mean', 'median', 'interpolation'],
            'hr_watch_rate', imputed_mean_dataset['hr_watch_rate'],
            imputed_median_dataset['hr_watch_rate'],
            imputed_interpolation_dataset['hr_watch_rate'])

    elif FLAGS.mode == 'kalman':
        # Using the result from Chapter 2, try the Kalman filter on the light_phone_lux attribute and study the result
        try:
            original_dataset = pd.read_csv(DATA_PATH / ORIG_DATASET_FNAME,
                                           index_col=0)
            original_dataset.index = pd.to_datetime(original_dataset.index)
        except IOError as e:
            print(
                'File not found, try to run previous crowdsignals scripts first!'
            )
            raise e
        KalFilter = KalmanFilters()
        kalman_dataset = KalFilter.apply_kalman_filter(
            data_table=original_dataset, col='acc_phone_x')
        DataViz.plot_imputed_values(kalman_dataset, ['original', 'kalman'],
                                    'acc_phone_x',
                                    kalman_dataset['acc_phone_x_kalman'])
        DataViz.plot_dataset(data_table=kalman_dataset,
                             columns=['acc_phone_x', 'acc_phone_x_kalman'],
                             match=['exact', 'exact'],
                             display=['line', 'line'])

    elif FLAGS.mode == 'lowpass':
        # Apply a lowpass filter and reduce the importance of the data above 1.5 Hz
        # Determine the sampling frequency
        fs = float(1000) / milliseconds_per_instance

        # Study acc_phone_x
        new_dataset = LowPass.low_pass_filter(
            data_table=copy.deepcopy(dataset),
            col='acc_phone_x',
            sampling_frequency=fs,
            cutoff_frequency=FLAGS.cutoff,
            order=10)
        DataViz.plot_dataset(
            new_dataset.iloc[int(0.4 * len(new_dataset.index)
                                 ):int(0.43 * len(new_dataset.index)), :],
            ['acc_phone_x', 'acc_phone_x_lowpass'], ['exact', 'exact'],
            ['line', 'line'])

    elif FLAGS.mode == 'PCA':
        # First impute again, as PCA can not deal with missing values
        for col in [c for c in dataset.columns if 'label' not in c]:
            dataset = MisVal.impute_interpolate(dataset, col)

        # Determine the PC's for all but the target columns (the labels and the heart rate)
        selected_predictor_cols = [
            c for c in dataset.columns
            if (not ('label' in c)) and (not (c == 'hr_watch_rate'))
        ]
        pc_values = PCA.determine_pc_explained_variance(
            data_table=dataset, cols=selected_predictor_cols)
        cumulated_variance = np.cumsum(pc_values)

        # Plot the explained variance and cumulated variance
        comp_numbers = np.arange(1, len(pc_values) + 1)
        DataViz.plot_xy(x=[comp_numbers, comp_numbers],
                        y=[pc_values, cumulated_variance],
                        xlabel='principal component number',
                        ylabel='explained variance',
                        ylim=[0, 1],
                        line_styles=['b-', 'r-'],
                        names=['Variance', 'Cumulated variance'])

        # Select 7 as the best number of PC's as this explains most of the variance
        n_pcs = 7
        dataset = PCA.apply_pca(data_table=copy.deepcopy(dataset),
                                cols=selected_predictor_cols,
                                number_comp=n_pcs)

        # Visualize the result of the PC's and the overall final dataset
        DataViz.plot_dataset(dataset, ['pca_', 'label'], ['like', 'like'],
                             ['line', 'points'])
        DataViz.plot_dataset(dataset, [
            'acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_',
            'press_phone_', 'pca_', 'label'
        ], [
            'like', 'like', 'like', 'like', 'like', 'like', 'like', 'like',
            'like'
        ], [
            'line', 'line', 'line', 'line', 'line', 'line', 'line', 'points',
            'points'
        ])

    elif FLAGS.mode == 'final':
        # Carry out that operation over all columns except for the label
        print('Imputing missing values.')
        for col in tqdm([c for c in dataset.columns if 'label' not in c]):
            dataset = MisVal.impute_interpolate(dataset=dataset, col=col)

        # Include all measurements that have a form of periodicity and filter them
        periodic_measurements = [
            'acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x',
            'acc_watch_y', 'acc_watch_z', 'gyr_phone_x', 'gyr_phone_y',
            'gyr_phone_z', 'gyr_watch_x', 'gyr_watch_y', 'gyr_watch_z',
            'mag_phone_x', 'mag_phone_y', 'mag_phone_z', 'mag_watch_x',
            'mag_watch_y', 'mag_watch_z'
        ]

        print('Applying low pass filter on peridic measurements.')
        # Determine the sampling frequency.
        fs = float(1000) / milliseconds_per_instance
        for col in tqdm(periodic_measurements):
            dataset = LowPass.low_pass_filter(data_table=dataset,
                                              col=col,
                                              sampling_frequency=fs,
                                              cutoff_frequency=FLAGS.cutoff,
                                              order=10)
            dataset[col] = dataset[col + '_lowpass']
            del dataset[col + '_lowpass']

        # Use the optimal found parameter n_pcs = 7 to apply PCA to the final dataset
        selected_predictor_cols = [
            c for c in dataset.columns
            if (not ('label' in c)) and (not (c == 'hr_watch_rate'))
        ]
        n_pcs = 7
        dataset = PCA.apply_pca(data_table=copy.deepcopy(dataset),
                                cols=selected_predictor_cols,
                                number_comp=n_pcs)

        # Visualize the final overall dataset
        DataViz.plot_dataset(dataset, [
            'acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_',
            'press_phone_', 'pca_', 'label'
        ], [
            'like', 'like', 'like', 'like', 'like', 'like', 'like', 'like',
            'like'
        ], [
            'line', 'line', 'line', 'line', 'line', 'line', 'line', 'points',
            'points'
        ])

        # Store the final outcome
        dataset.to_csv(DATA_PATH / RESULT_FNAME)
Esempio n. 6
0


        dataSet.data_table = dataSet.data_table[(length - 53): (length - 1)]  # same length for every sample

        FreqAbs = FourierTransformation()
        transformations = []
        number_frequencies = 50
        for column in list(dataSet.data_table.columns):
            transformation = np.abs(np.fft.fft(dataSet.data_table[column], number_frequencies))
            transformations.append((column,transformation))

        cutoff_frequency = 20
        sampling_frequency = 50
        order = 3
        LowPass = LowPassFilter()

        if len(dataSet.data_table[ 'AccelerometerX (m/s^2)']) < 50:
            print path + "\\" + label + "\\" + sample

        new_dataset = LowPass.low_pass_filter(dataSet.data_table, 'AccelerometerX (m/s^2)', sampling_frequency,
                                              cutoff_frequency,
                                              order=order, phase_shift=True)
        new_dataset = LowPass.low_pass_filter(new_dataset, 'AccelerometerY (m/s^2)', sampling_frequency,
                                              cutoff_frequency, order=order,
                                              phase_shift=True)
        new_dataset = LowPass.low_pass_filter(new_dataset, 'AccelerometerZ (m/s^2)', sampling_frequency,
                                              cutoff_frequency, order=order,
                                              phase_shift=True)
        new_dataset = LowPass.low_pass_filter(new_dataset, 'GyroscopeX (rad/s)', sampling_frequency, cutoff_frequency,
                                              order=order,