def test_serialize_data_and_class_df(): sensor = 'DW' raw_df, annotation_df = s_loader.load_smoking_df(session=6, sensors=[ sensor, ], kind='corrected') seg_raw_df = s_segment.do_segmentation_on_raw(raw_df, method='window', paras={ 'window_size': 200, 'overlap_rate': 0.5 }) seg_raw_df = s_feature.do_preprocess_on_segment_raw(seg_raw_df) seg_annotation_df = s_segment.set_segmentation_on_annotation( annotation_df, seg_raw_df) feature_annotation_df = s_feature.get_info_from_segment_annotation( seg_annotation_df, s_info.feature_info_dict) class_df = assign_class_from_feature_annotation(feature_annotation_df) feature_func_dict = s_info.feature_dataset_folder + "/feature_dict_" + sensor + '.json' feature_raw_df = s_feature.get_features_from_segment_raw( seg_raw_df, feature_func_dict) data_df = get_data_df_from_feature_raw_df(feature_raw_df) data_pkl_name, class_pkl_name = serialize_data_and_class_df( data_df, class_df)
def test_pre_exclude_rest_instances(): sensor = 'DW' raw_df, annotation_df = s_loader.load_smoking_df(session=6, sensors=[ sensor, ], kind='corrected') seg_raw_df = s_segment.do_segmentation_on_raw(raw_df, method='window', paras={ 'window_size': 320, 'overlap_rate': 0.5 }) seg_raw_df = s_feature.do_preprocess_on_segment_raw(seg_raw_df) seg_annotation_df = s_segment.set_segmentation_on_annotation( annotation_df, seg_raw_df) feature_annotation_df = s_feature.get_info_from_segment_annotation( seg_annotation_df, s_info.feature_info_dict) class_df = assign_class_from_feature_annotation(feature_annotation_df) # test class_df = pre_exclude_rest_instances(seg_raw_df, class_df=class_df) print "============Test result of pre-exclusion=================" print "============class dataframe comparation================" print class_df print "=========rest segments===============" print class_df[class_df[s_info.classnum_col] == -1] print "=========not rest segments===========" print class_df[class_df[s_info.classnum_col] != -1]
def test_do_preprocess_on_segment_raw(): sensor = 'DW' raw_df, annotation_df = s_loader.load_smoking_df(session=1, sensors=[ sensor, ], kind='corrected') seg_raw_df = s_segment.do_segmentation_on_raw(raw_df, method='window', paras={ 'window_size': 320, 'overlap_rate': 0.5 }) pp_seg_raw_df = do_preprocess_on_segment_raw(seg_raw_df) print "===============test results of preprocessing on segment raw==================" print seg_raw_df.head() print pp_seg_raw_df.head()
def test_get_info_from_segment_annotation(): sensor = 'DW' raw_df, annotation_df = s_loader.load_smoking_df(session=1, sensors=[ sensor, ], kind='corrected') seg_raw_df = s_segment.do_segmentation_on_raw(raw_df, method='window', paras={ 'window_size': 320, 'overlap_rate': 0.5 }) seg_annotation_df = s_segment.set_segmentation_on_annotation( annotation_df, seg_raw_df) feature_annotation_df = get_info_from_segment_annotation( seg_annotation_df, s_info.feature_info_dict) print "===================test feature construction result==================" print feature_annotation_df print "===================feature dataframe head===========================" print feature_annotation_df.head().T print "===================feature dataframe tail===========================" print feature_annotation_df.tail().T
def test_get_features_from_segment_raw(): sensor = 'DW' raw_df, annotation_df = s_loader.load_smoking_df(session=1, sensors=[ sensor, ], kind='corrected') seg_raw_df = s_segment.do_segmentation_on_raw(raw_df, method='window', paras={ 'window_size': 320, 'overlap_rate': 0.5 }) feature_func_dict = s_info.feature_dataset_folder + "/feature_dict_" + sensor + '.json' feature_raw_df = get_features_from_segment_raw(seg_raw_df, feature_func_dict) print "===================test feature construction result==================" print seg_raw_df print feature_raw_df print "===================feature dataframe head===========================" print feature_raw_df.head().T print "===================feature dataframe tail===========================" print feature_raw_df.tail().T
def test_assign_class_from_feature_annotation(): sensor = 'DW' raw_df, annotation_df = s_loader.load_smoking_df(session=1, sensors=[ sensor, ], kind='corrected') seg_raw_df = s_segment.do_segmentation_on_raw(raw_df, method='window', paras={ 'window_size': 40, 'overlap_rate': 0.5 }) seg_annotation_df = s_segment.set_segmentation_on_annotation( annotation_df, seg_raw_df) feature_annotation_df = s_feature.get_info_from_segment_annotation( seg_annotation_df, s_info.feature_info_dict) class_df = assign_class_from_feature_annotation(feature_annotation_df) print "============Test result of assigning class=================" print class_df print "============class dataframe head=================" print class_df.head().T print "============class dataframe tail=================" print class_df.tail().T
def build_whole_dataset_each_sensor_from_csv(sessions=[], sensors=[], raw_type="corrected", paras={ 'window_size': 320, 'overlap_rate': 0.5 }): """ It will load raw data and annotation from csv files, compute features and merge different sessions them into a large dataset Load: 1. Read in raw and annotation dataset 2. Do segmentation 3. Do feature construction and information computation 4. Do class assignment and pre-exclusion 5. Serialization Connect: 1. Load Args: sessions: sessions in array to be used sensors: sensor code in array to be used raw_type: "raw", "clean" and "corrected" paras: hyperparameters: window_size and overlap_rate Return: data_pkl_names, class_pkl_names """ print "================refresh pkls==============================================================" print "sessions: %s, sensors: %s, raw_type: %s, paras: %s" % ( sessions, sensors, raw_type, paras) if len(sessions) == 0 or len(sensors) == 0: return None data_pkl_names = {} class_pkl_names = {} for sensor in sensors: data_pkl_names[sensor] = [] class_pkl_names[sensor] = [] # load from csv, get data and for session in sessions: raw_df, annotation_df = s_loader.load_smoking_df(session=session, sensors=sensors, kind=raw_type) # groupby sensors grouped_raw = raw_df.groupby(s_info.sensor_col) grouped_annotation = annotation_df.groupby(s_info.sensor_col) for sensor, group_raw in grouped_raw: group_annotation = grouped_annotation.get_group(sensor) # do segmentation on raw seg_raw_df = s_segment.do_segmentation_on_raw(group_raw, method='window', paras=paras) # do preprocessing on raw seg_raw_df = s_feature.do_preprocess_on_segment_raw(seg_raw_df) # do segmentation on annotation seg_annotation_df = s_segment.set_segmentation_on_annotation( group_annotation, seg_raw_df) # do info construction on annotation feature_annotation_df = s_feature.get_info_from_segment_annotation( seg_annotation_df, s_info.feature_info_dict) # do class assignment on annotation class_df = s_prepare.assign_class_from_feature_annotation( feature_annotation_df) # do pre-exclusion on class dataframe class_df = s_prepare.pre_exclude_rest_instances(seg_raw_df, class_df=class_df) # do feature construction on segment raw feature_func_dict = s_info.feature_dataset_folder + "/feature_dict_" + sensor + '.json' feature_raw_df = s_feature.get_features_from_segment_raw( seg_raw_df, feature_func_dict) data_df = s_prepare.get_data_df_from_feature_raw_df(feature_raw_df) # serialize data and class df data_pkl, class_pkl = s_prepare.serialize_data_and_class_df( data_df, class_df) data_pkl_names[sensor].append(data_pkl) class_pkl_names[sensor].append(class_pkl) print "" # connect together if len(sessions) > 1: data_pkl_names, class_pkl_names = build_whole_dataset_each_sensor_from_pkl( sessions, sensors, paras['window_size']) return data_pkl_names, class_pkl_names