Ejemplo n.º 1
0
def run_feature_projected_classification(train_x_matrix,
                                         train_y_vector,
                                         test_x_matrix,
                                         test_y_vector,
                                         feature_array,
                                         top_k,
                                         method,
                                         class_id=-1,
                                         logger=None):
    if logger is None:
        logger = init_logging('')

    train_row, attr_len, attr_num, input_map = train_x_matrix.shape
    test_row, attr_len, attr_num, input_map = test_x_matrix.shape
    real_num_classes, attr_num = feature_array.shape
    all_predict_matrix = np.zeros(test_row * real_num_classes).reshape(
        test_row, real_num_classes)

    feature_col = attr_len * top_k * input_map

    if class_id == -1:
        min_class = min(train_y_vector)
        max_class = max(train_y_vector) + 1
    else:
        min_class = class_id
        max_class = class_id + 1

    n_neighbors = 1
    samples_leaf = 20
    prob = True

    all_f1_value = []
    all_train_time = []
    all_test_time = []
    #min_class = 9
    for i in range(min_class, max_class):
        logger.info('class: ' + str(i))
        temp_train_y_vector = np.where(train_y_vector == i, 1, 0)
        temp_test_y_vector = np.where(test_y_vector == i, 1, 0)

        fold_positive_len = len(np.where(temp_train_y_vector == 1)[0])
        fold_negative_len = len(temp_train_y_vector) - fold_positive_len

        logger.info("=====")
        logger.info("positive class labels length: " + str(fold_positive_len))
        logger.info("negative class labels length: " + str(fold_negative_len))
        class_feature = feature_array[i]
        class_feature = class_feature[0:top_k]
        logger.info("feature list: " + str(class_feature))

        temp_train_x_matrix = train_x_matrix[:, :, class_feature, :]
        temp_test_x_matrix = test_x_matrix[:, :, class_feature, :]
        temp_train_x_matrix = temp_train_x_matrix.reshape(
            train_row, feature_col)
        temp_test_x_matrix = temp_test_x_matrix.reshape(test_row, feature_col)

        if method == 'knn':
            class_accuracy, class_predict_y, class_predict_prob, class_train_time, class_test_time = run_knn(
                temp_train_x_matrix, temp_train_y_vector, temp_test_x_matrix,
                temp_test_y_vector, n_neighbors, prob)
        elif method == 'rf':
            class_accuracy, class_predict_y, class_predict_prob, class_train_time, class_test_time = run_rf(
                temp_train_x_matrix, temp_train_y_vector, temp_test_x_matrix,
                temp_test_y_vector, samples_leaf, prob)
        elif method == 'libsvm':
            class_accuracy, class_predict_y, class_predict_prob, class_train_time, class_test_time = run_libsvm(
                temp_train_x_matrix, temp_train_y_vector, temp_test_x_matrix,
                temp_test_y_vector, logger, prob, '', True)

        class_accuracy, precision, recall, class_f1, tp, fp, tn, fn = f1_value_precision_recall_accuracy(
            class_predict_y, temp_test_y_vector, 1)

        logger.info(method + " f1 for class " + str(i) + ": " + str(class_f1))
        logger.info(method + " accuracy for class " + str(i) + ": " +
                    str(class_accuracy))

        all_f1_value.append(class_f1)
        all_train_time.append(class_train_time)
        all_test_time.append(class_test_time)
        all_predict_matrix[:, i] = class_predict_prob[:, 1]
        #if i > 2:
        #    break
    all_accuracy, all_predict_y = predict_matrix_with_prob_to_predict_accuracy(
        all_predict_matrix, test_y_vector)
    return all_accuracy, all_f1_value, all_predict_y, all_train_time, all_test_time, all_predict_matrix
Ejemplo n.º 2
0
def run_feature_projected_cnn(train_x_matrix,
                              train_y_vector,
                              test_x_matrix,
                              test_y_vector,
                              data_stru,
                              cnn_setting,
                              feature_dict,
                              top_k,
                              saver_file_profix='',
                              class_id=-1,
                              logger=None):
    if logger is None:
        logger = init_logging('')
    method = 'cnn'

    real_num_classes = data_stru.num_classes
    data_stru.num_classes = 2
    cnn_setting.num_classes = 2
    cnn_setting.feature_method = 'none'
    num_classes = 2

    train_row, attr_len, attr_num, input_map = train_x_matrix.shape
    test_row, attr_len, attr_num, input_map = test_x_matrix.shape

    all_predict_matrix = np.zeros(test_row * real_num_classes).reshape(
        test_row, real_num_classes)

    saver_file = ''
    if class_id == -1:
        min_class = min(train_y_vector)
        max_class = max(train_y_vector) + 1
    else:
        min_class = class_id
        max_class = class_id + 1

    saver_file_profix = saver_file_profix + '_class'

    keep_saver_file = ''
    all_train_time = 0
    all_test_time = 0
    all_f1_value = []
    all_train_time = []
    all_test_time = []
    for i in range(min_class, max_class):
        logger.info('class: ' + str(i))
        temp_train_y_vector = np.where(train_y_vector == i, 1, 0)
        temp_test_y_vector = np.where(test_y_vector == i, 1, 0)
        class_saver_profix = saver_file_profix + str(i)

        fold_positive_len = len(np.where(temp_train_y_vector == 1)[0])
        fold_negative_len = len(temp_train_y_vector) - fold_positive_len

        logger.info("=====")
        logger.info("positive class labels length: " + str(fold_positive_len))
        logger.info("negative class labels length: " + str(fold_negative_len))
        class_feature = feature_dict[i]
        class_feature = class_feature[0:top_k]
        print("class: " + str(i))
        print("number of features: " + str(top_k))
        print("Top features list: " + str(class_feature))
        logger.info("Top feature list: " + str(class_feature))

        temp_train_x_matrix = train_x_matrix[:, :, class_feature, :]
        temp_test_x_matrix = test_x_matrix[:, :, class_feature, :]
        temp_train_y_matrix = y_vector_to_matrix(temp_train_y_vector,
                                                 num_classes)
        temp_test_y_matrix = y_vector_to_matrix(temp_test_y_vector,
                                                num_classes)

        if i == min_class:
            train_x_placeholder, output_y_placeholder, predict_y_prob, keep_prob_placeholder, keeped_feature_list, saver_file = cnn_set_flow_graph(
                data_stru, cnn_setting, input_map, False, logger)
            keep_saver_file = saver_file

        saver_file = cnn_setting.temp_obj_folder + class_saver_profix + keep_saver_file + "_top" + str(
            top_k)
        print saver_file
        class_eval_value, class_train_time, class_test_time, class_predict_prob, fold_saver_file, fold_obj_file = cnn_train(
            temp_train_x_matrix, temp_train_y_matrix, temp_test_x_matrix,
            temp_test_y_matrix, num_classes, cnn_setting, train_x_placeholder,
            output_y_placeholder, predict_y_prob, keep_prob_placeholder,
            keeped_feature_list, saver_file, logger)

        class_predict_y = np.argmax(class_predict_prob, axis=1)
        class_accuracy, precision, recall, class_f1, tp, fp, tn, fn = f1_value_precision_recall_accuracy(
            class_predict_y, temp_test_y_vector, 1)
        if str(class_eval_value) == 'nan':
            class_eval_value = 0
            class_f1 = 0
        logger.info(method + " f1 for class " + str(i) + ": " + str(class_f1))
        logger.info(method + " accuracy for class " + str(i) + ": " +
                    str(class_accuracy))
        logger.info(method + ' model saved: ' + fold_saver_file)
        all_f1_value.append(class_f1)
        all_train_time.append(class_train_time)
        all_test_time.append(class_test_time)
        all_predict_matrix[:, i] = class_predict_prob[:, 1]
        #if i > 2:
        #    break
    all_accuracy, all_predict_y = predict_matrix_with_prob_to_predict_accuracy(
        all_predict_matrix, test_y_vector)
    data_stru.num_classes = real_num_classes
    return all_accuracy, all_f1_value, all_predict_y, all_train_time, all_test_time, all_predict_matrix