def uci_hapt(self): """ Sampling rate = 50 1 WALKING 2 WALKING_UPSTAIRS 3 WALKING_DOWNSTAIRS 4 SITTING 5 STANDING 6 LAYING 7 STAND_TO_SIT 8 SIT_TO_STAND 9 SIT_TO_LIE 10 LIE_TO_SIT 11 STAND_TO_LIE 12 LIE_TO_STAND """ self.name = "UCI HAPT" subfolder = 'UCI/HAPT Data Set/RawData/' data_file = self.root_folder + subfolder + '/data.npz' if not self.simple_labels: activity_map = { 1: 'WALKING', 2: 'WALKING_UPSTAIRS', 3: 'WALKING_DOWNSTAIRS', 4: 'SITTING', 5: 'STANDING', 6: 'LAYING', 7: 'STAND_TO_SIT', 8: 'SIT_TO_STAND', 9: 'SIT_TO_LIE', 10: 'LIE_TO_SIT', 11: 'STAND_TO_LIE', 12: 'LIE_TO_STAND' } else: activity_map = { 1: 'WALKING', 2: 'STAIRS', 3: 'STAIRS', 4: 'INACTIVE', 5: 'INACTIVE', 6: 'INACTIVE', 7: 'TRANSITION', 8: 'TRANSITION', 9: 'TRANSITION', 10: 'TRANSITION', 11: 'TRANSITION', 12: 'TRANSITION' } # Load files files = sorted(glob.glob(self.root_folder + subfolder + 'acc_*')) labels = pd.read_csv(self.root_folder + subfolder + 'labels.txt', names=['exp', 'user', 'activity', 'start', 'end'], header=None, sep=' ') # Extract signals from the files and split them into segments. UCI HAR V1 uses 128 window length with # a step size of 64 data_array = np.empty((0, self.n_samples, 3)) y = np.empty((0)) users = np.empty((0)) # for exp, user in labels[['exp', 'user']].drop_duplicates().values: # print("Loading %s" % self.root_folder + subfolder + 'acc_exp%02d_user%02d.txt' % (exp, user)) # values = pd.read_csv(self.root_folder + subfolder + 'acc_exp%02d_user%02d.txt' % (exp, user), sep=' ').values # idx = ((labels['exp']==exp) & (labels['user']==user)) # # for activity, start, end in labels[['activity', 'start', 'end']][idx].values: # segment = values[start:end] # # # Pad a segment to a multiple of n_samples # if segment.shape[0] <= self.n_samples: # pad_width = int(np.ceil(segment.shape[0]/float(self.n_samples))*self.n_samples - segment.shape[0]) # segment = np.pad(segment, ((0, pad_width), (0, 0)), 'edge').reshape(1, self.n_samples, 3) # # # Segment with a rolling window allowing overlap # else: # try: # segment = rolling_window(segment, (self.n_samples, 0), self.step).swapaxes(1, 2) # except ValueError, e: # print(e) # print(segment.shape) # # # Collect data # data_array = np.concatenate((data_array, segment)) # y = np.concatenate((y, [activity]*segment.shape[0])) # users = np.concatenate((users, [(user)]*segment.shape[0])) for exp, user in labels[['exp', 'user']].drop_duplicates().values: print("Loading %s" % self.root_folder + subfolder + 'acc_exp%02d_user%02d.txt' % (exp, user)) df = pd.read_csv(self.root_folder + subfolder + 'acc_exp%02d_user%02d.txt' % (exp, user), sep=' ') idx = ((labels['exp'] == exp) & (labels['user'] == user)) values = df.values activities = np.zeros((values.shape[0], 1)) for activity, start, end in labels[['activity', 'start', 'end']][idx].values: activities[start:end] = activity values = np.concatenate((values, activities), axis=1) # Segment into windows with overlap segmented = rolling_window(values, (self.n_samples, 0), self.step).swapaxes(1, 2) # Find y label # t = [] # for idx in range(segmented.shape[0]): # t.append(segmented[idx, -1, -1].astype('int')) t_idx = int(self.n_samples / 2) t = np.asarray(segmented[:, t_idx, -1]).astype('int') # Remove samples without label # idx = t != 0 # segmented = segmented[idx] # t = t[idx] # Collect data y = np.concatenate((y, t)) data_array = np.concatenate((data_array, segmented[:, :, :-1])) users = np.concatenate((users, [(user)] * len(t))) if self.expand: y = expand_target(y, data_array.shape[1]) # Add features to data data_array, stats = self.add_features( data_array, normalise=self.normalize, add_roll=self.add_roll, add_pitch=self.add_pitch, add_filter=self.add_filter, comp_magnitude=self.comp_magnitude) # Convert to common labels if self.common_labels: y = self.map_to_common_activities(y, activity_map) else: # y = y - 1 pass return data_array, y.astype('int'), self.name, users, stats
def idash(self): """ This dataset contains motion sensor data of 16 physical activities (walking, jogging, stair climbing, etc.) collected on 16 adults using an iPod touch device (Apple Inc.). The data sampling rate was 30 Hz. The collection time for an activity varied from 20 seconds to 17 minutes. """ self.name = "IDASH" sub_folder = 'Physical Activity Sensor Data-Public/Public/iDASH_activity_dataset/' sr = 30. labels = [ '400m_brisk_walk_pocket', '400m_jog_pocket', '400m_normal_walk_pocket', '400m_slow_walk_pocket', 'Sit_and_walk_test_pocket', 'Stair_ascend_brisk_pocket', 'Stair_ascend_normal_pocket', 'Stair_descend_brisk_pocket', 'Stair_descend_normal_pocket', 'Step_test_brisk_pocket', 'Step_test_normal_pocket', 'Treadmill_jog_5.5mph_pocket', 'Treadmill_walk_1.5mph_pocket', 'Treadmill_walk_3.0mph_pocket', 'Treadmill_walk_4.0mph_pocket', 'Walking_pocket' ] activity_map = { 0: 'WALKING', 1: 'JOGGING', 2: 'WALKING', 3: 'WALKING', 4: 'WALKING', 5: 'STAIRS', 6: 'STAIRS', 7: 'STAIRS', 8: 'STAIRS', 9: 'STEP', 10: 'STEP', 11: 'JOGGING', 12: 'WALKING', 13: 'WALKING', 14: 'WALKING', 15: 'WALKING' } # Load data subjects = list(range(1, 17)) cols = [0, 1, 2] tmp_seg = np.empty((0, self.n_samples * sr / SR, len(cols))) y = [] users = [] for subject in subjects: files = sorted( glob.glob(self.root_folder + sub_folder + '%d/*' % subject)) for idx, csv_file in enumerate(files): if not "blank" in csv_file: tmp = pd.read_csv(csv_file, sep=',', usecols=cols).values tmp = rolling_window(tmp, window=(self.n_samples * sr / SR, 0), step=self.step * sr / SR).swapaxes( 1, 2) tmp_seg = np.vstack((tmp_seg, tmp)) y.append(np.ones((tmp.shape[0], ), dtype=np.int) * idx) users.append( np.ones((tmp.shape[0], ), dtype=np.int) * subject) users = np.asarray(list(itertools.chain.from_iterable(users))) y = list(itertools.chain.from_iterable(y)) n_windows, sequence_length, n_features = tmp_seg.shape data_array = np.empty((n_windows, self.n_samples, n_features)) for idx in range(n_windows): for f in range(n_features): data_array[idx, :, f] = resample(tmp_seg[idx, :, f], self.n_samples) data_array, stats = self.add_features( data_array, normalise=self.normalize, add_roll=self.add_roll, add_pitch=self.add_pitch, add_filter=self.add_filter, comp_magnitude=self.comp_magnitude) y = self.map_to_common_activities(y, activity_map) return data_array, y.astype('int'), self.name, users, stats
def wisdm2(self): """ Sampling rate: 20hz Activity: Walking, jogging, sitting, standing, upstairs, downstairs :return: tuple of data """ self.name = "WISDM2" sub_folder = 'WISDM/Real/' filename = 'WISDM_at_v2.0_raw.txt' columns = ['user', 'labels', 'timestamp', 'x', 'y', 'z'] dtypes = { 'user': np.int32, 'labels': np.str, 'timestamp': np.float64, 'x': np.float32, 'y': np.float32, 'z': np.float32 } df = pd.read_csv(self.root_folder + sub_folder + filename, names=columns, lineterminator=';', dtype=dtypes) df = df.dropna() activity_map = { 'Walking': 'WALKING', 'Upstairs': 'STAIRS', 'Stairs': 'STAIRS', 'Downstairs': 'STAIRS', 'Sitting': 'INACTIVE', 'Standing': 'INACTIVE', 'LyingDown': 'INACTIVE', 'Jogging': 'JOGGING' } df['labels'] = df['labels'].apply(activity_map.get) df['labels'] = df['labels'].apply(MAP_ACTIVITY.get) sr = 20. tmp = np.empty((0, 5)) for user in df['user'].unique(): tmp = np.concatenate((tmp, df[['x', 'y', 'z', 'labels', 'user' ]][df['user'] == user].values), axis=0) # window samples into the equivalent of n_samples at 50 Hz. tmp = rolling_window(tmp, window=(self.n_samples * sr / SR, 0), step=self.step * sr / SR).swapaxes(1, 2) n_windows, sequence_length, n_features = tmp.shape y = np.zeros(n_windows) users = np.zeros(n_windows) data_array = np.empty((n_windows, self.n_samples, 3)) for idx in range(n_windows): y[idx] = np.argmax(np.bincount(tmp[idx, :, -2].astype('int'))) users[idx] = np.argmax(np.bincount(tmp[idx, :, -1].astype('int'))) for f in range(3): data_array[idx, :, f] = resample(tmp[idx, :, f], self.n_samples) data_array = self.add_features(data_array, normalise=self.normalize, ratio=0, add_roll=self.add_roll, add_pitch=self.add_pitch, add_filter=self.add_filter, comp_magnitude=self.comp_magnitude) return data_array, y.astype('int'), self.name, users
def uci_mhealth(self): ''' #Activities: 12 #Sensor devices: 3 (chest, left ankel, right arm) #Subjects: 10 L1: Standing still (1 min) L2: Sitting and relaxing (1 min) L3: Lying down (1 min) L4: Walking (1 min) L5: Climbing stairs (1 min) L6: Waist bends forward (20x) L7: Frontal elevation of arms (20x) L8: Knees bending (crouching) (20x) L9: Cycling (1 min) L10: Jogging (1 min) L11: Running (1 min) L12: Jump front & back (20x) :return: data ''' self.name = "UCI mHealth" sub_folder = 'UCI/mHealth/' sr = 50 activity_map = { 0: 'NULL', 1: 'INACTIVE', 2: 'INACTIVE', 3: 'INACTIVE', 4: 'WALKING', 5: 'STAIRS', 6: 'BEND_FORWARD', 7: 'ARM_ELEVATION', 8: 'KNEE_BEND', 9: 'CYCLING', 10: 'JOGGING', 11: 'RUNNING', 12: 'JUMP' } # Load the first subject and then the rest iteratively data = pd.read_csv(self.root_folder + sub_folder + 'mHealth_subject%d.log' % 1, sep='\t', usecols=[0, 1, 2, 23], names=['x', 'y', 'z', 'labels']) data_array = rolling_window(data.values, window=(self.n_samples, 0), step=self.step).swapaxes(1, 2) users = np.ones((data_array.shape[0])) * 1 for idx in range(2, 11): tmp = pd.read_csv(self.root_folder + sub_folder + 'mHealth_subject%d.log' % idx, sep='\t', usecols=[0, 1, 2, 23], names=['x', 'y', 'z', 'label']).values tmp = rolling_window(tmp, window=(self.n_samples, 0), step=self.step).swapaxes(1, 2) data_array = np.concatenate((data_array, tmp)) users = np.concatenate((users, idx * np.ones(tmp.shape[0]))) y = np.empty(users.shape) for idx in range(users.shape[0]): y[idx] = np.argmax( np.bincount(data_array[idx, :, -1].astype('int'))) data_array = data_array[:, :, :3] data_array, stats = self.add_features( data_array, normalise=self.normalize, ratio=0, add_roll=self.add_roll, add_pitch=self.add_pitch, add_filter=self.add_filter, comp_magnitude=self.comp_magnitude) y = self.map_to_common_activities(y, activity_map) return data_array, y.astype('int'), self.name, users, stats
def concat_sequence(x, window, step): return rolling_window(x.reshape(-1, x.shape[-1]).swapaxes(0, 1), window, step)\ .swapaxes(0, 1).swapaxes(1, 2)