def do_ssl_for_one_column(data, label_index, feat_indices, one_hot_cols, min_max_cols, unlabel_num): """ Does semi-supervised learning experiments with the selected column as labels. :param data: original data :param label_index: label index :param feat_indices: feature indices :param one_hot_cols: columns for one-hot encoding :param min_max_cols: columns for min-max normalization :param unlabel_num: number of unlabeled data points """ data_X, data_y = split_feat_and_label(data=data, label_index=label_index, feat_indices=feat_indices) data_X = one_hot_preprocess_sl(data_X=data_X, one_hot_cols=one_hot_cols, min_max_cols=min_max_cols) train_X, train_y, test_X, test_y = get_tr_te_set(data_X=data_X, data_y=data_y, test_size=895) """ train_X: (4000, 32), train_y: (4000,) test_X: (895, 32), test_y: (895,) """ do_ssl_experiments(train_X=train_X, train_y=train_y, unlabel_num=unlabel_num, test_X=test_X, test_y=test_y, label_name=str(label_index))
def do_sl_for_one_column(data, label_index, feat_indices, one_hot_cols, min_max_cols): """ Does supervised learning experiments with the selected column as labels. :param data: original data. :param label_index: label index. :param feat_indices: feature indices. :param one_hot_cols: columns for one-hot encoding. :param min_max_cols: columns for min-max normalization. """ data_X, data_y = split_feat_and_label(data=data, label_index=label_index, feat_indices=feat_indices) data_X = one_hot_preprocess_sl(data_X=data_X, one_hot_cols=one_hot_cols, min_max_cols=min_max_cols) train_X, train_y, test_X, test_y = get_tr_te_set(data_X=data_X, data_y=data_y, test_size=895) """ train_X: (4000, 32), train_y: (4000,) test_X: (895, 32), test_y: (895,) """ # Normal experiments do_sl_experiments(train_X=train_X, train_y=train_y, test_X=test_X, test_y=test_y, label_name=str(label_index)) # Experiments for deleting the only numerical feature do_sl_experiments(train_X=train_X[:, 0:31], train_y=train_y, test_X=test_X[:, 0:31], test_y=test_y, label_name=str(label_index) + '_nm')
def do_tl_for_one_column(data, label_index, feat_indices, split_col, one_hot_cols, min_max_cols, test_size): """ Does transfer learning experiments with the selected column as labels. :param data: original data :param label_index: label index :param feat_indices: feature indices :param split_col: column to be split for transfer learning :param one_hot_cols: columns for one-hot encoding :param min_max_cols: columns for min-max normalization. :param test_size: size of test data for target """ data_X, data_y = split_feat_and_label(data=data, label_index=label_index, feat_indices=feat_indices) prepro_source_X, source_data_y, prepro_target_X, target_data_y = \ one_hot_preprocess_tl(data_X=data_X, data_y=data_y, split_col=split_col, one_hot_cols=one_hot_cols, min_max_cols=min_max_cols) source_X, source_y, target_tr_X, target_tr_y, target_te_X, target_te_y = \ get_so_ta_set(prepro_source_X=prepro_source_X, source_data_y=source_data_y, prepro_target_X=prepro_target_X, target_data_y=target_data_y, test_size=test_size) do_tl_experiments(source_X=source_X, source_y=source_y, target_tr_X=target_tr_X, target_tr_y=target_tr_y, target_te_X=target_te_X, target_te_y=target_te_y, label_name=str(label_index), split_name=str(split_col))
def get_sl_for_one_column(data, label_index, feat_indices, one_hot_cols, min_max_cols): """ Gets supervised learning results with the selected column as labels. :param data: original data. :param label_index: label index. :param feat_indices: feature indices. :param one_hot_cols: columns for one-hot encoding. :param min_max_cols: columns for min-max normalization. """ data_X, data_y = split_feat_and_label(data=data, label_index=label_index, feat_indices=feat_indices) data_X = one_hot_preprocess_sl(data_X=data_X, one_hot_cols=one_hot_cols, min_max_cols=min_max_cols) train_X, train_y, test_X, test_y = get_tr_te_set(data_X=data_X, data_y=data_y, test_size=895) """ train_X: (4000, 32), train_y: (4000,) test_X: (895, 32), test_y: (895,) """ # Normal experiments lp_filename = "saved_models/sl/linear perceptron " + str( label_index) + ".pkl" gnp_filename = "saved_models/sl/Gaussian Naive Bayes " + str( label_index) + ".pkl" svc_filename = "saved_models/sl/support vector machine " + str( label_index) + ".pkl" lgr_filename = "saved_models/sl/logistic regression " + str( label_index) + ".pkl" rf_filename = "saved_models/sl/random forest " + str(label_index) + ".pkl" adb_filename = "saved_models/sl/AdaBoost " + str(label_index) + ".pkl" get_sl_results(train_X=train_X, train_y=train_y, test_X=test_X, test_y=test_y, label_name=str(label_index), lp_filename=lp_filename, gnp_filename=gnp_filename, svc_filename=svc_filename, lgr_filename=lgr_filename, rf_filename=rf_filename, adb_filename=adb_filename) # Experiments for deleting the only numerical feature lp_filename_nm = "saved_models/sl/linear perceptron " + str( label_index) + "_nm.pkl" gnp_filename_nm = "saved_models/sl/Gaussian Naive Bayes " + str( label_index) + "_nm.pkl" svc_filename_nm = "saved_models/sl/support vector machine " + str( label_index) + "_nm.pkl" lgr_filename_nm = "saved_models/sl/logistic regression " + str( label_index) + "_nm.pkl" rf_filename_nm = "saved_models/sl/random forest " + str( label_index) + "_nm.pkl" adb_filename_nm = "saved_models/sl/AdaBoost " + str( label_index) + "_nm.pkl" get_sl_results(train_X=train_X[:, 0:31], train_y=train_y, test_X=test_X[:, 0:31], test_y=test_y, label_name=str(label_index) + '_nm', lp_filename=lp_filename_nm, gnp_filename=gnp_filename_nm, svc_filename=svc_filename_nm, lgr_filename=lgr_filename_nm, rf_filename=rf_filename_nm, adb_filename=adb_filename_nm)