def main(): DataViz = VisualizeDataset() dataset_path = './intermediate_datafiles/' try: dataset = pd.read_csv(dataset_path + 'chapter3_result_final.csv', index_col=0) except IOError as e: print( 'File not found, try to run previous crowdsignals scripts first!') raise e dataset.index = dataset.index.to_datetime() milliseconds_per_instance = (dataset.index[1] - dataset.index[0]).microseconds / 1000 # Now we move to the frequency domain, with the same window size. FreqAbs = FourierTransformation() fs = float(1000) / milliseconds_per_instance periodic_predictor_cols = [ 'acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x', 'acc_watch_y', 'acc_watch_z', 'gyr_phone_x', 'gyr_phone_y', 'gyr_phone_z', 'gyr_watch_x', 'gyr_watch_y', 'gyr_watch_z', 'mag_phone_x', 'mag_phone_y', 'mag_phone_z', 'mag_watch_x', 'mag_watch_y', 'mag_watch_z' ] data_table = FreqAbs.abstract_frequency( copy.deepcopy(dataset), ['acc_phone_x'], int(float(10000) / milliseconds_per_instance), fs) # Spectral analysis. DataViz.plot_dataset(data_table, [ 'acc_phone_x_max_freq', 'acc_phone_x_freq_weighted', 'acc_phone_x_pse', 'label' ], ['like', 'like', 'like', 'like'], ['line', 'line', 'line', 'points']) dataset = FreqAbs.abstract_frequency( dataset, periodic_predictor_cols, int(float(10000) / milliseconds_per_instance), fs) # Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike. # The percentage of overlap we allow window_overlap = 0.9 skip_points = int((1 - window_overlap) * ws) dataset = dataset.iloc[::skip_points, :] DataViz.plot_dataset( dataset, [ 'acc_phone_x', 'gyr_phone_x', 'hr_watch_rate', 'light_phone_lux', 'mag_phone_x', 'press_phone_', 'pca_1', 'label' ], ['like', 'like', 'like', 'like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line', 'line', 'line', 'line', 'line', 'points'])
print(dataset.columns) DataViz.plot_dataset(dataset, ['acc_x', 'acc_y', 'acc_z', 'label'], ['exact', 'like', 'like', 'like'], ['line', 'line', 'line', 'points']) # ws = int(float(0.5*60000)/milliseconds_per_instance) # dataset = NumAbs.abstract_numerical(dataset, periodic_predictor_cols, ws, 'mean') # dataset = NumAbs.abstract_numerical(dataset, periodic_predictor_cols, ws, 'std') print('temporal', dataset.shape) print('attributes frequency domain') # Now we move to the frequency domain, with the same window size. FreqAbs = FourierTransformation() fs = float(1000) / milliseconds_per_instance print('frequency', dataset.shape) # Spectral analysis. dataset = FreqAbs.abstract_frequency( dataset, periodic_predictor_cols, int(float(10000) / milliseconds_per_instance), fs) print('frequency all col', dataset.shape) for col in dataset.columns: print(col, dataset[dataset[col].isna() == True].count()) # Now we only take a certain percentage of overlap in the windows, otherwise our training examples will be too much alike. # pickle.dump(dataset, open('concat_no_skipping.pkl', 'wb'))
DataViz.plot_dataset( dataset, [ 'gravity.x', 'gravity.y', 'gravity.z', 'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z', 'label' ], ['like', 'like', 'like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line', 'line', 'line', 'line', 'points']) CatAbs = CategoricalAbstraction() dataset = CatAbs.abstract_categorical( dataset, ['label'], ['like'], 0.03, int(float(5 * 60000) / milliseconds_per_instance), 2) # Now we move to the frequency domain, with the same window size. FreqAbs = FourierTransformation() fs = float(1000) / milliseconds_per_instance periodic_predictor_cols = [ 'gravity.x', 'gravity.y', 'gravity.z', 'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z' ] data_table = FreqAbs.abstract_frequency( copy.deepcopy(dataset), ['userAcceleration.x'], int(float(10000) / milliseconds_per_instance), fs) # Spectral analysis. DataViz.plot_dataset(data_table, [ 'userAcceleration.x_max_freq', 'userAcceleration.x_freq_weighted', 'userAcceleration.x_pse', 'label'
# Create time points.... df = pd.DataFrame(np.arange(0, 16.1, float(1) / fs), columns=list('X')) c1 = 3 * np.sin(2 * math.pi * 0.2 * df['X']) c2 = 2 * np.sin(2 * math.pi * 0.25 * (df['X'] - 2)) + 5 df['Y'] = c1 + c2 plot.hold(True) plot.plot(df['X'], df['Y'], 'b-') plot.legend(['$example$ $measurement$ $sequence$'], loc=3, fontsize='small') plot.xlabel('time') plot.ylabel('$X_{1}$') plot.show() # Figure 4.2 FreqAbs = FourierTransformation() data_table = FreqAbs.abstract_frequency(copy.deepcopy(df), ['Y'], 160, fs) # Get the frequencies from the columns.... frequencies = [] values = [] for col in data_table.columns: val = re.findall(r'freq_\d+\.\d+_Hz', col) if len(val) > 0: frequency = float((val[0])[5:len(val) - 4]) frequencies.append(frequency) values.append(data_table.ix[data_table.index, col]) fig = plot.figure() plot.hold(True) ax1 = fig.add_subplot(111) plot.xlim([0, 5])
dataset = NumAbs.abstract_numerical(dataset, selected_predictor_cols, ws, 'std') # DataViz.plot_dataset(dataset, # ['acc_phone_X', 'gyr_phone_X', 'mag_phone_X', 'pca_1', 'label'], # ['like', 'like', 'like', 'like', 'like'], # ['line', 'line', 'line', 'line', 'points']) # # # support for labels is useless in our case # CatAbs = CategoricalAbstraction() # dataset = CatAbs.abstract_categorical(dataset, ['label'], ['like'], 0.03, # int(float(8000) / milliseconds_per_instance), 2) # # Now we move to the frequency domain, with the same window size. FreqAbs = FourierTransformation() fs = float(1000) / milliseconds_per_instance # Todo change? periodic_predictor_cols = [ 'acc_phone_X', 'acc_phone_Y', 'acc_phone_Z', 'gyr_phone_X', 'gyr_phone_Y', 'gyr_phone_Z', 'mag_phone_X', 'mag_phone_Y', 'mag_phone_Z' ] # # data_table = FreqAbs.abstract_frequency(copy.deepcopy(dataset), ['acc_phone_Y'], # int(float(4000) / milliseconds_per_instance), fs) # Spectral analysis. # DataViz.plot_dataset(data_table, ['acc_phone_Y_max_freq', 'acc_phone_Y_freq_weighted', 'acc_phone_Y_pse', 'label'], # ['like', 'like', 'like', 'like'], ['line', 'line', 'line', 'points']) # we use 4s
DATA_PATH = Path('./intermediate_datafiles/') DATASET_FNAME = 'chapter3_result_final.csv' RESULT_FNAME = 'chapter4_result.csv' categorical_abstraction_result_file = 'chapter4_categorical_result.csv' task = 'frequency_plot' dataset = pd.read_csv(DATA_PATH / DATASET_FNAME, index_col=0) dataset.index = pd.Series({x: pd.to_datetime(x) for x in dataset.index}) DataViz = VisualizeDataset(__file__, show=False) milliseconds_per_instance = (dataset.index[1] - dataset.index[0]).microseconds / 1000 if task == 'frequency_plot': fs = float(1000) / milliseconds_per_instance FreqAbs = FourierTransformation() data_table = dataset[dataset['labelOnTable'] == 1] data_table = FreqAbs.abstract_frequency( copy.deepcopy(data_table), ['acc_phone_x'], int(float(10000) / milliseconds_per_instance), fs) frequencies = [] values = [] for col in data_table.columns: val = re.findall(r'freq_\d+\.\d+_Hz', col) if len(val) > 0: frequency = float((val[0])[5:len(val) - 4]) frequencies.append(frequency) values.append(abs(data_table.loc[data_table.index, col])) #absolute amp fig = plt.figure()
def main(): # Read the result from the previous chapter convert the index to datetime try: dataset = pd.read_csv(DATA_PATH / DATASET_FNAME, index_col=0) dataset.index = pd.to_datetime(dataset.index) except IOError as e: print( 'File not found, try to run previous crowdsignals scripts first!') raise e # Create an instance of visualization class to plot the results DataViz = VisualizeDataset(__file__) # Compute the number of milliseconds covered by an instance based on the first two rows milliseconds_per_instance = (dataset.index[1] - dataset.index[0]).microseconds / 1000 # Create objects for feature abstraction NumAbs = NumericalAbstraction() CatAbs = CategoricalAbstraction() FreqAbs = FourierTransformation() if FLAGS.mode == 'time': # Focus on the time domain first # Set the window sizes to the number of instances representing 5 seconds, 30 seconds and 5 minutes window_sizes = [ int(float(5000) / milliseconds_per_instance), int(float(0.5 * 60000) / milliseconds_per_instance), int(float(5 * 60000) / milliseconds_per_instance) ] dataset_copy = copy.deepcopy(dataset) for ws in window_sizes: print( f'Abstracting numerical features for window size {ws * milliseconds_per_instance / 1000}s.' ) dataset_copy = NumAbs.abstract_numerical( data_table=dataset_copy, cols=['acc_phone_x'], window_size=ws, aggregation_function='mean') dataset_copy = NumAbs.abstract_numerical( data_table=dataset_copy, cols=['acc_phone_x'], window_size=ws, aggregation_function='std') DataViz.plot_dataset(data_table=dataset_copy, columns=[ 'acc_phone_x', 'acc_phone_x_temp_mean', 'acc_phone_x_temp_std', 'label' ], match=['exact', 'like', 'like', 'like'], display=['line', 'line', 'line', 'points']) elif FLAGS.mode == 'frequency': # Move to the frequency domain with the same window size fs = 1000.0 / milliseconds_per_instance ws = int(10000.0 / milliseconds_per_instance) data_table = FreqAbs.abstract_frequency( data_table=copy.deepcopy(dataset), cols=['acc_phone_x'], window_size=ws, sampling_rate=fs) # Spectral analysis DataViz.plot_dataset(data_table=data_table, columns=[ 'acc_phone_x_max_freq', 'acc_phone_x_freq_weighted', 'acc_phone_x_pse', 'label' ], match=['like', 'like', 'like', 'like'], display=['line', 'line', 'line', 'points']) elif FLAGS.mode == 'final': ws = int(float(0.5 * 60000) / milliseconds_per_instance) fs = 1000.0 / milliseconds_per_instance # Abstract time domain features and plot the result selected_predictor_cols = [ c for c in dataset.columns if 'label' not in c ] print('Calculating mean and std for selected predictor cols.') dataset = NumAbs.abstract_numerical(data_table=dataset, cols=selected_predictor_cols, window_size=ws, aggregation_function='mean') dataset = NumAbs.abstract_numerical(data_table=dataset, cols=selected_predictor_cols, window_size=ws, aggregation_function='std') DataViz.plot_dataset(data_table=dataset, columns=[ 'acc_phone_x', 'gyr_phone_x', 'hr_watch_rate', 'light_phone_lux', 'mag_phone_x', 'press_phone_', 'pca_1', 'label' ], match=[ 'like', 'like', 'like', 'like', 'like', 'like', 'like', 'like' ], display=[ 'line', 'line', 'line', 'line', 'line', 'line', 'line', 'points' ]) # Abstract categorical features print('Abstracting categorical features.') dataset = CatAbs.abstract_categorical( data_table=dataset, cols=['label'], match=['like'], min_support=0.03, window_size=int(float(5 * 60000) / milliseconds_per_instance), max_pattern_size=2) # Abstract frequency domain features periodic_predictor_cols = [ 'acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x', 'acc_watch_y', 'acc_watch_z', 'gyr_phone_x', 'gyr_phone_y', 'gyr_phone_z', 'gyr_watch_x', 'gyr_watch_y', 'gyr_watch_z', 'mag_phone_x', 'mag_phone_y', 'mag_phone_z', 'mag_watch_x', 'mag_watch_y', 'mag_watch_z' ] print('Abstracting frequency features.') dataset = FreqAbs.abstract_frequency(data_table=dataset, cols=periodic_predictor_cols, window_size=ws, sampling_rate=fs) # Take a certain percentage of overlap in the windows, otherwise training examples will be too much alike # Set the allowed percentage of overlap window_overlap = FLAGS.overlap skip_points = int((1 - window_overlap) * ws) dataset = dataset.iloc[::skip_points, :] # Plot the final dataset DataViz.plot_dataset(data_table=dataset, columns=[ 'acc_phone_x', 'gyr_phone_x', 'hr_watch_rate', 'light_phone_lux', 'mag_phone_x', 'press_phone_', 'pca_1', 'label' ], match=[ 'like', 'like', 'like', 'like', 'like', 'like', 'like', 'like' ], display=[ 'line', 'line', 'line', 'line', 'line', 'line', 'line', 'points' ]) # Store the generated dataset dataset.to_csv(DATA_PATH / RESULT_FNAME)
ws = int(float(0.5 * 60000) / milliseconds_per_instance) selected_predictor_cols = [c for c in dataset_cs.columns if not 'label' in c] dataset_cs = NumAbs.abstract_numerical(dataset_cs, selected_predictor_cols, ws, 'mean') dataset_cs = NumAbs.abstract_numerical(dataset_cs, selected_predictor_cols, ws, 'std') CatAbs = CategoricalAbstraction() dataset_cs = CatAbs.abstract_categorical( dataset_cs, ['label'], ['like'], 0.03, int(float(5 * 60000) / milliseconds_per_instance), 2) # Now we move to the frequency domain, with the same window size. FreqAbs = FourierTransformation() fs = float(1000) / milliseconds_per_instance periodic_predictor_cols = [ 'acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x', 'acc_watch_y', 'acc_watch_z', 'gyr_phone_x', 'gyr_phone_y', 'gyr_phone_z', 'gyr_watch_x', 'gyr_watch_y', 'gyr_watch_z', 'mag_phone_x', 'mag_phone_y', 'mag_phone_z', 'mag_watch_x', 'mag_watch_y', 'mag_watch_z' ] data_table = FreqAbs.abstract_frequency( copy.deepcopy(dataset_cs), ['acc_phone_x'], int(float(10000) / milliseconds_per_instance), fs) # Spectral analysis. DataViz.plot_dataset(data_table, [
dataSet = CreateDataset(path + "\\" + label + "\\" + sample + "\\", milliseconds_per_instance) # for sensor in sensors: dataSet.add_numerical_dataset("Accelerometer.csv", 'Time (s)', ['X (m/s^2)', 'Y (m/s^2)', 'Z (m/s^2)'], 'avg', "Accelerometer") dataSet.add_numerical_dataset("Gyroscope.csv", 'Time (s)', ['X (rad/s)', 'Y (rad/s)', 'Z (rad/s)'], 'avg', "Gyroscope") dataSet.data_table = dataSet.data_table[~(np.isnan(dataSet.data_table['GyroscopeZ (rad/s)']))] # todo: useful? length = len(dataSet.data_table) dataSet.data_table = dataSet.data_table[(length - 53): (length - 1)] # same length for every sample FreqAbs = FourierTransformation() transformations = [] number_frequencies = 50 for column in list(dataSet.data_table.columns): transformation = np.abs(np.fft.fft(dataSet.data_table[column], number_frequencies)) transformations.append((column,transformation)) cutoff_frequency = 20 sampling_frequency = 50 order = 3 LowPass = LowPassFilter() if len(dataSet.data_table[ 'AccelerometerX (m/s^2)']) < 50: print path + "\\" + label + "\\" + sample new_dataset = LowPass.low_pass_filter(dataSet.data_table, 'AccelerometerX (m/s^2)', sampling_frequency,