예제 #1
0
def test_serialize_data_and_class_df():
    sensor = 'DW'
    raw_df, annotation_df = s_loader.load_smoking_df(session=6,
                                                     sensors=[
                                                         sensor,
                                                     ],
                                                     kind='corrected')
    seg_raw_df = s_segment.do_segmentation_on_raw(raw_df,
                                                  method='window',
                                                  paras={
                                                      'window_size': 200,
                                                      'overlap_rate': 0.5
                                                  })
    seg_raw_df = s_feature.do_preprocess_on_segment_raw(seg_raw_df)
    seg_annotation_df = s_segment.set_segmentation_on_annotation(
        annotation_df, seg_raw_df)
    feature_annotation_df = s_feature.get_info_from_segment_annotation(
        seg_annotation_df, s_info.feature_info_dict)
    class_df = assign_class_from_feature_annotation(feature_annotation_df)
    feature_func_dict = s_info.feature_dataset_folder + "/feature_dict_" + sensor + '.json'
    feature_raw_df = s_feature.get_features_from_segment_raw(
        seg_raw_df, feature_func_dict)
    data_df = get_data_df_from_feature_raw_df(feature_raw_df)
    data_pkl_name, class_pkl_name = serialize_data_and_class_df(
        data_df, class_df)
예제 #2
0
def test_pre_exclude_rest_instances():
    sensor = 'DW'
    raw_df, annotation_df = s_loader.load_smoking_df(session=6,
                                                     sensors=[
                                                         sensor,
                                                     ],
                                                     kind='corrected')
    seg_raw_df = s_segment.do_segmentation_on_raw(raw_df,
                                                  method='window',
                                                  paras={
                                                      'window_size': 320,
                                                      'overlap_rate': 0.5
                                                  })
    seg_raw_df = s_feature.do_preprocess_on_segment_raw(seg_raw_df)
    seg_annotation_df = s_segment.set_segmentation_on_annotation(
        annotation_df, seg_raw_df)
    feature_annotation_df = s_feature.get_info_from_segment_annotation(
        seg_annotation_df, s_info.feature_info_dict)
    class_df = assign_class_from_feature_annotation(feature_annotation_df)

    # test
    class_df = pre_exclude_rest_instances(seg_raw_df, class_df=class_df)
    print "============Test result of pre-exclusion================="
    print "============class dataframe comparation================"
    print class_df
    print "=========rest segments==============="
    print class_df[class_df[s_info.classnum_col] == -1]
    print "=========not rest segments==========="
    print class_df[class_df[s_info.classnum_col] != -1]
예제 #3
0
def test_do_preprocess_on_segment_raw():
    sensor = 'DW'
    raw_df, annotation_df = s_loader.load_smoking_df(session=1,
                                                     sensors=[
                                                         sensor,
                                                     ],
                                                     kind='corrected')
    seg_raw_df = s_segment.do_segmentation_on_raw(raw_df,
                                                  method='window',
                                                  paras={
                                                      'window_size': 320,
                                                      'overlap_rate': 0.5
                                                  })
    pp_seg_raw_df = do_preprocess_on_segment_raw(seg_raw_df)
    print "===============test results of preprocessing on segment raw=================="
    print seg_raw_df.head()
    print pp_seg_raw_df.head()
예제 #4
0
def test_get_info_from_segment_annotation():
    sensor = 'DW'
    raw_df, annotation_df = s_loader.load_smoking_df(session=1,
                                                     sensors=[
                                                         sensor,
                                                     ],
                                                     kind='corrected')
    seg_raw_df = s_segment.do_segmentation_on_raw(raw_df,
                                                  method='window',
                                                  paras={
                                                      'window_size': 320,
                                                      'overlap_rate': 0.5
                                                  })
    seg_annotation_df = s_segment.set_segmentation_on_annotation(
        annotation_df, seg_raw_df)
    feature_annotation_df = get_info_from_segment_annotation(
        seg_annotation_df, s_info.feature_info_dict)
    print "===================test feature construction result=================="
    print feature_annotation_df
    print "===================feature dataframe head==========================="
    print feature_annotation_df.head().T
    print "===================feature dataframe tail==========================="
    print feature_annotation_df.tail().T
예제 #5
0
def test_get_features_from_segment_raw():
    sensor = 'DW'
    raw_df, annotation_df = s_loader.load_smoking_df(session=1,
                                                     sensors=[
                                                         sensor,
                                                     ],
                                                     kind='corrected')
    seg_raw_df = s_segment.do_segmentation_on_raw(raw_df,
                                                  method='window',
                                                  paras={
                                                      'window_size': 320,
                                                      'overlap_rate': 0.5
                                                  })
    feature_func_dict = s_info.feature_dataset_folder + "/feature_dict_" + sensor + '.json'
    feature_raw_df = get_features_from_segment_raw(seg_raw_df,
                                                   feature_func_dict)
    print "===================test feature construction result=================="
    print seg_raw_df
    print feature_raw_df
    print "===================feature dataframe head==========================="
    print feature_raw_df.head().T
    print "===================feature dataframe tail==========================="
    print feature_raw_df.tail().T
예제 #6
0
def test_assign_class_from_feature_annotation():
    sensor = 'DW'
    raw_df, annotation_df = s_loader.load_smoking_df(session=1,
                                                     sensors=[
                                                         sensor,
                                                     ],
                                                     kind='corrected')
    seg_raw_df = s_segment.do_segmentation_on_raw(raw_df,
                                                  method='window',
                                                  paras={
                                                      'window_size': 40,
                                                      'overlap_rate': 0.5
                                                  })
    seg_annotation_df = s_segment.set_segmentation_on_annotation(
        annotation_df, seg_raw_df)
    feature_annotation_df = s_feature.get_info_from_segment_annotation(
        seg_annotation_df, s_info.feature_info_dict)
    class_df = assign_class_from_feature_annotation(feature_annotation_df)
    print "============Test result of assigning class================="
    print class_df
    print "============class dataframe head================="
    print class_df.head().T
    print "============class dataframe tail================="
    print class_df.tail().T
예제 #7
0
def build_whole_dataset_each_sensor_from_csv(sessions=[],
                                             sensors=[],
                                             raw_type="corrected",
                                             paras={
                                                 'window_size': 320,
                                                 'overlap_rate': 0.5
                                             }):
    """ It will load raw data and annotation from csv files, compute features and merge different sessions
  them into a large dataset

  Load:
    1. Read in raw and annotation dataset
    2. Do segmentation
    3. Do feature construction and information computation
    4. Do class assignment and pre-exclusion
    5. Serialization

  Connect:
    1. Load

  Args:
    sessions: sessions in array to be used
    sensors: sensor code in array to be used
    raw_type: "raw", "clean" and "corrected"
    paras: hyperparameters: window_size and overlap_rate
  Return:
    data_pkl_names, class_pkl_names
  """
    print "================refresh pkls=============================================================="
    print "sessions: %s, sensors: %s, raw_type: %s, paras: %s" % (
        sessions, sensors, raw_type, paras)
    if len(sessions) == 0 or len(sensors) == 0:
        return None
    data_pkl_names = {}
    class_pkl_names = {}
    for sensor in sensors:
        data_pkl_names[sensor] = []
        class_pkl_names[sensor] = []
    # load from csv, get data and
    for session in sessions:
        raw_df, annotation_df = s_loader.load_smoking_df(session=session,
                                                         sensors=sensors,
                                                         kind=raw_type)
        # groupby sensors
        grouped_raw = raw_df.groupby(s_info.sensor_col)
        grouped_annotation = annotation_df.groupby(s_info.sensor_col)
        for sensor, group_raw in grouped_raw:
            group_annotation = grouped_annotation.get_group(sensor)
            # do segmentation on raw
            seg_raw_df = s_segment.do_segmentation_on_raw(group_raw,
                                                          method='window',
                                                          paras=paras)
            # do preprocessing on raw
            seg_raw_df = s_feature.do_preprocess_on_segment_raw(seg_raw_df)
            # do segmentation on annotation
            seg_annotation_df = s_segment.set_segmentation_on_annotation(
                group_annotation, seg_raw_df)
            # do info construction on annotation
            feature_annotation_df = s_feature.get_info_from_segment_annotation(
                seg_annotation_df, s_info.feature_info_dict)
            # do class assignment on annotation
            class_df = s_prepare.assign_class_from_feature_annotation(
                feature_annotation_df)
            # do pre-exclusion on class dataframe
            class_df = s_prepare.pre_exclude_rest_instances(seg_raw_df,
                                                            class_df=class_df)
            # do feature construction on segment raw
            feature_func_dict = s_info.feature_dataset_folder + "/feature_dict_" + sensor + '.json'
            feature_raw_df = s_feature.get_features_from_segment_raw(
                seg_raw_df, feature_func_dict)
            data_df = s_prepare.get_data_df_from_feature_raw_df(feature_raw_df)
            # serialize data and class df
            data_pkl, class_pkl = s_prepare.serialize_data_and_class_df(
                data_df, class_df)
            data_pkl_names[sensor].append(data_pkl)
            class_pkl_names[sensor].append(class_pkl)
        print ""
    # connect together
    if len(sessions) > 1:
        data_pkl_names, class_pkl_names = build_whole_dataset_each_sensor_from_pkl(
            sessions, sensors, paras['window_size'])
    return data_pkl_names, class_pkl_names