Beispiel #1
0
def do_ssl_for_one_column(data, label_index, feat_indices, one_hot_cols,
                          min_max_cols, unlabel_num):
    """
       Does semi-supervised learning experiments with the selected column as labels.
    :param data: original data
    :param label_index: label index
    :param feat_indices: feature indices
    :param one_hot_cols: columns for one-hot encoding
    :param min_max_cols: columns for min-max normalization
    :param unlabel_num: number of unlabeled data points
    """
    data_X, data_y = split_feat_and_label(data=data,
                                          label_index=label_index,
                                          feat_indices=feat_indices)
    data_X = one_hot_preprocess_sl(data_X=data_X,
                                   one_hot_cols=one_hot_cols,
                                   min_max_cols=min_max_cols)
    train_X, train_y, test_X, test_y = get_tr_te_set(data_X=data_X,
                                                     data_y=data_y,
                                                     test_size=895)
    """
       train_X: (4000, 32), train_y: (4000,)
       test_X: (895, 32), test_y: (895,)
    """
    do_ssl_experiments(train_X=train_X,
                       train_y=train_y,
                       unlabel_num=unlabel_num,
                       test_X=test_X,
                       test_y=test_y,
                       label_name=str(label_index))
Beispiel #2
0
def do_sl_for_one_column(data, label_index, feat_indices, one_hot_cols,
                         min_max_cols):
    """
       Does supervised learning experiments with the selected column as labels.
    :param data: original data.
    :param label_index: label index.
    :param feat_indices: feature indices.
    :param one_hot_cols: columns for one-hot encoding.
    :param min_max_cols: columns for min-max normalization.
    """
    data_X, data_y = split_feat_and_label(data=data,
                                          label_index=label_index,
                                          feat_indices=feat_indices)
    data_X = one_hot_preprocess_sl(data_X=data_X,
                                   one_hot_cols=one_hot_cols,
                                   min_max_cols=min_max_cols)
    train_X, train_y, test_X, test_y = get_tr_te_set(data_X=data_X,
                                                     data_y=data_y,
                                                     test_size=895)
    """
       train_X: (4000, 32), train_y: (4000,)
       test_X: (895, 32), test_y: (895,)
    """
    # Normal experiments
    do_sl_experiments(train_X=train_X,
                      train_y=train_y,
                      test_X=test_X,
                      test_y=test_y,
                      label_name=str(label_index))
    # Experiments for deleting the only numerical feature
    do_sl_experiments(train_X=train_X[:, 0:31],
                      train_y=train_y,
                      test_X=test_X[:, 0:31],
                      test_y=test_y,
                      label_name=str(label_index) + '_nm')
Beispiel #3
0
def do_tl_for_one_column(data, label_index, feat_indices, split_col,
                         one_hot_cols, min_max_cols, test_size):
    """
        Does transfer learning experiments with the selected column as labels.
    :param data: original data
    :param label_index: label index
    :param feat_indices: feature indices
    :param split_col: column to be split for transfer learning
    :param one_hot_cols: columns for one-hot encoding
    :param min_max_cols: columns for min-max normalization.
    :param test_size: size of test data for target
    """
    data_X, data_y = split_feat_and_label(data=data,
                                          label_index=label_index,
                                          feat_indices=feat_indices)
    prepro_source_X, source_data_y, prepro_target_X, target_data_y = \
        one_hot_preprocess_tl(data_X=data_X, data_y=data_y, split_col=split_col,
                              one_hot_cols=one_hot_cols, min_max_cols=min_max_cols)
    source_X, source_y, target_tr_X, target_tr_y, target_te_X, target_te_y = \
        get_so_ta_set(prepro_source_X=prepro_source_X, source_data_y=source_data_y,
                      prepro_target_X=prepro_target_X, target_data_y=target_data_y,
                      test_size=test_size)

    do_tl_experiments(source_X=source_X,
                      source_y=source_y,
                      target_tr_X=target_tr_X,
                      target_tr_y=target_tr_y,
                      target_te_X=target_te_X,
                      target_te_y=target_te_y,
                      label_name=str(label_index),
                      split_name=str(split_col))
Beispiel #4
0
def get_sl_for_one_column(data, label_index, feat_indices, one_hot_cols,
                          min_max_cols):
    """
       Gets supervised learning results with the selected column as labels.
    :param data: original data.
    :param label_index: label index.
    :param feat_indices: feature indices.
    :param one_hot_cols: columns for one-hot encoding.
    :param min_max_cols: columns for min-max normalization.
    """
    data_X, data_y = split_feat_and_label(data=data,
                                          label_index=label_index,
                                          feat_indices=feat_indices)
    data_X = one_hot_preprocess_sl(data_X=data_X,
                                   one_hot_cols=one_hot_cols,
                                   min_max_cols=min_max_cols)
    train_X, train_y, test_X, test_y = get_tr_te_set(data_X=data_X,
                                                     data_y=data_y,
                                                     test_size=895)
    """
       train_X: (4000, 32), train_y: (4000,)
       test_X: (895, 32), test_y: (895,)
    """
    # Normal experiments
    lp_filename = "saved_models/sl/linear perceptron " + str(
        label_index) + ".pkl"
    gnp_filename = "saved_models/sl/Gaussian Naive Bayes " + str(
        label_index) + ".pkl"
    svc_filename = "saved_models/sl/support vector machine " + str(
        label_index) + ".pkl"
    lgr_filename = "saved_models/sl/logistic regression " + str(
        label_index) + ".pkl"
    rf_filename = "saved_models/sl/random forest " + str(label_index) + ".pkl"
    adb_filename = "saved_models/sl/AdaBoost " + str(label_index) + ".pkl"
    get_sl_results(train_X=train_X,
                   train_y=train_y,
                   test_X=test_X,
                   test_y=test_y,
                   label_name=str(label_index),
                   lp_filename=lp_filename,
                   gnp_filename=gnp_filename,
                   svc_filename=svc_filename,
                   lgr_filename=lgr_filename,
                   rf_filename=rf_filename,
                   adb_filename=adb_filename)
    # Experiments for deleting the only numerical feature
    lp_filename_nm = "saved_models/sl/linear perceptron " + str(
        label_index) + "_nm.pkl"
    gnp_filename_nm = "saved_models/sl/Gaussian Naive Bayes " + str(
        label_index) + "_nm.pkl"
    svc_filename_nm = "saved_models/sl/support vector machine " + str(
        label_index) + "_nm.pkl"
    lgr_filename_nm = "saved_models/sl/logistic regression " + str(
        label_index) + "_nm.pkl"
    rf_filename_nm = "saved_models/sl/random forest " + str(
        label_index) + "_nm.pkl"
    adb_filename_nm = "saved_models/sl/AdaBoost " + str(
        label_index) + "_nm.pkl"
    get_sl_results(train_X=train_X[:, 0:31],
                   train_y=train_y,
                   test_X=test_X[:, 0:31],
                   test_y=test_y,
                   label_name=str(label_index) + '_nm',
                   lp_filename=lp_filename_nm,
                   gnp_filename=gnp_filename_nm,
                   svc_filename=svc_filename_nm,
                   lgr_filename=lgr_filename_nm,
                   rf_filename=rf_filename_nm,
                   adb_filename=adb_filename_nm)