def feature_processing(self, filename, trip=1, workers=8): """Read the data records and create data features """ accelerometer_data = dm.trip_data_to_df(filename, trip=trip) segments = self.split_segments(accelerometer_data) dim = ceil(len(segments) / workers) chunks = (segments[k:k + dim] for k in range(0, len(segments), dim)) with ProcessPoolExecutor(max_workers=workers) as executor: futures = [ executor.submit(self.features, chunk) for chunk in chunks ] segment_features = [] for future in futures: segment_features.append(future.result()) segments_df = pd.concat(segment_features, ignore_index=True) segments_df = segments_df.sort_values('Time') # Average features to 1 minute intervals segments = self.split_segments( segments_df, time_intervals=self.AVG_FEATURES_INTERVALS) dim = ceil(len(segments) / workers) chunks = (segments[k:k + dim] for k in range(0, len(segments), dim)) with ProcessPoolExecutor(max_workers=workers) as executor: futures = [ executor.submit(self.average_features, chunk) for chunk in chunks ] average_features = [] for future in futures: average_features.append(future.result()) avg_features_df = pd.concat(average_features, ignore_index=True) avg_features_df = avg_features_df.sort_values('Time') return avg_features_df
def get_joined_trips_data(self, trips): joineds = [] for trip in trips: print(f"processing trip {trip}") files = ['Pixel_accelerometer', f'Pixel_gyro_{trip}'] results = [] for file in files: result = self.feature_processing(file, trip=trip) file = self.clean_file_name(file) result.rename(columns={ 'msm': f'msm_{file}', 'variance': f'variance_{file}' }, inplace=True) results.append(result) # results = [] # for future in futures: # results.append(future.result()) pre_result = pd.concat(results, axis=1) result = self.remove_dup_columns(pre_result) # print(result) data = dm.trip_data_to_df("Pixel_activity", trip=trip) # Average features to 1 minute intervals data_segments = self.split_segments( data, time_intervals=self.AVG_FEATURES_INTERVALS) avg_classification_df = self.average_classification(data_segments) joined_df = self.join_sensors_classification_data( result, avg_classification_df) joineds.append(joined_df) return joineds