Exemple #1
0
def backward_multitime(
        train_x,
        train_y,
        test_x,
        test_y,
        n_selected_features,
        data_key="test",
        method="cnn",
        cnn_setting_file="../../parameters/cnn_model_parameter.txt",
        logger=None):
    """
    This function implements the backward feature selection algorithm based on decision tree

    Input
    -----
    train_x: {3d numpy array matrix}, shape (n_samples, n_features, time_length)
        input data
    train_y: {1d numpy array vector}, shape (n_samples,)
        input class labels
    test_x: {3d numpy array matrix}, shape (n_samples, n_features, time_length)
        input data
    test_y: {1d numpy array vector}, shape (n_samples,)
        input class labels
    Output
    ------
    F: {numpy array}, shape (n_features, )
        index of selected features
    """

    if logger is None:
        log_file = ""
        logger = setup_logger(log_file)

    train_samples, n_features, time_length = train_x.shape

    f_score = []
    eval_method = "f1"
    if method == "cnn":
        min_class = min(train_y)
        max_class = max(train_y)
        num_classes = max_class - min_class + 1
        data_stru = data_structure(num_classes, min_class, n_features,
                                   time_length)
        cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        saver_file_profix = "../../object/" + data_key + "/backward_multitime/" + method
        saver_file_profix = init_folder(saver_file_profix)
        saver_file_profix = saver_file_profix + return_cnn_keyword(cnn_setting)
        eval_method = cnn_setting.eval_method
        all_f_eval_value, all_f_train_time, all_f_test_time, predict_proba, saver_file, feature_list_obj_file, relu_base_array = model_evaluation_cnn(
            train_x, train_y, test_x, test_y, data_stru, cnn_setting,
            saver_file_profix, logger)
    elif method == "rf":
        model = RandomForestClassifier(n_estimators=50, random_state=0)
        all_f_eval_value, all_f_train_time, all_f_test_time = model_evaluation_rf(
            train_x, train_y, test_x, test_y, model, logger)

    logger.info("With ALL Feature")
    logger.info(method + " " + eval_method + " Value For ALL Feature: " +
                str(all_f_eval_value))
    logger.info(method + " Training time (sec): " + str(all_f_train_time))
    logger.info(method + " Testing time (sec): " + str(all_f_test_time))
    # selected feature set, initialized to contain all features
    F = range(n_features)
    count = n_features
    iter_num = 0
    while count > n_selected_features:
        max_eval_value = -1
        for i in range(n_features):
            if i in F:
                F.remove(i)
                train_x_tmp = train_x[:, F, :]
                test_x_tmp = test_x[:, F, :]

                if method == "cnn":
                    eval_value, train_run_time, test_run_time, predict_proba, saver_file, feature_list_obj_file, relu_based_array = model_evaluation_cnn(
                        train_x_tmp, train_y, test_x_tmp, test_y, data_stru,
                        cnn_setting, saver_file_profix, logger)
                    f_eval_value = all_f_eval_value - eval_value
                elif method == "rf":
                    eval_value, train_run_time, test_run_time = model_evaluation_rf(
                        train_x_tmp, train_y, test_x_tmp, test_y, model,
                        logger)
                    f_eval_value = all_f_eval_value - eval_value

                logger.info("Without Feature " + str(i) + ": ")
                logger.info(method + eval_method + " Value For Feature " +
                            str(i) + ": " + str(f_eval_value))
                logger.info(method + " Training time (sec): " +
                            str(train_run_time))
                logger.info(method + " Testing time (sec): " +
                            str(test_run_time))
                f_score.append(f_eval_value)
                F.append(i)
                # record the feature which results in the largest accuracy
                if eval_value > max_eval_value:
                    max_eval_value = eval_value
                    idx = i
        logger.info("For iter " + str(iter_num))
        logger.info("Eval score vector: " + str(f_score))
        logger.info("The removed attribute is: " + str(idx))
        # delete the feature which results in the largest accuracy
        F.remove(idx)
        count -= 1
        iter_num = iter_num + 1
    return np.array(F)
Exemple #2
0
def cnn_load_main(parameter_file, file_keyword, function_keyword="cnn_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(parameter_file, function_keyword)

    print(data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file)

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)
    
    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column)

    file_list = list_files(data_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.out_obj_folder = out_obj_folder
    cnn_setting.out_model_folder = out_model_folder
    cnn_setting.full_feature_num = 400
    init_folder(out_obj_folder)
    init_folder(out_model_folder)
    
    print (out_model_folder)
    model_file_list = list_files(out_model_folder)

    result_obj_folder = obj_folder + method +"_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    logger = setup_logger('')

    delimiter = ' '
    loop_count = -1
    saver_file_profix = ""
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        saver_file_profix = file_key
        test_file = train_file.replace('train', 'test')

        #train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, '', class_column, delimiter, header)
        data_group, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, '', class_column, delimiter, header)
        train_x_matrix = data_group.train_x_matrix
        train_y_vector = data_group.train_y_vector
        test_x_matrix = data_group.test_x_matrix
        test_y_vector = data_group.test_y_vector

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)
        train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes)
        test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes)

        found_model_file = ""
        for model_file in model_file_list:
            if model_file.startswith(file_key):
                model_file = model_file.split('.')[0]
                found_model_file = out_model_folder + model_file + ".ckpt"
                break
        if found_model_file == "":
            raise Exception("No model object file found!!!")
        print(found_model_file)
        cnn_session, logits_out, train_x_placeholder, keep_prob_placeholder, keeped_feature_list = load_model(found_model_file, data_stru, cnn_setting, logger)

        last_conv_tensor = keeped_feature_list[0]
        train_last_conv = cnn_session.run(last_conv_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        test_last_conv = cnn_session.run(last_conv_tensor, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0})
        drop_num = 10
        print(np.squeeze(test_last_conv[1, :, :, :]))
        test_last_conv = top_attr_x_matrix(test_last_conv, drop_num)
        print(np.squeeze(test_last_conv[1, :, :, :]))
        train_last_conv = top_attr_x_matrix(train_last_conv, drop_num)

        output_y_placeholder = tf.placeholder(tf.float32, [None, num_classes])
        actual = tf.argmax(output_y_placeholder, axis=1)
        prediction = tf.argmax(logits_out, axis=1)
        correct_prediction = tf.equal(actual, prediction)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        ori_pred_y_vector = cnn_session.run(prediction, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0})
        test_accuracy = cnn_session.run(accuracy, feed_dict={train_x_placeholder: test_x_matrix, keep_prob_placeholder: 1.0, output_y_placeholder: test_y_matrix})
        cnn_session.close()
        
        kernel_eval_matrix, ref_kernel_eval_matrix = last_conv_analysis(train_last_conv, train_y_vector)
        print(kernel_eval_matrix.shape)
        print(kernel_eval_matrix)
        train_ins_len = len(train_y_vector)
        test_ins_len = len(test_y_vector)
        batch_size = 100
        layer_list = np.array([400])
        max_epoch = 10
        stop_threshold = 0.99
        activation_fun = 3
        std_value = 0.02
        eval_method = "acc"
        saver_file = './test_1.save'
        nn_setting = nn_parameters(layer_list, batch_size, max_epoch, stop_threshold, activation_fun, std_value, eval_method, saver_file)
        all_pred_prob = []
        for c in range(num_classes):
            train_y_vector_class = np.zeros((train_ins_len))
            index_class = np.where(train_y_vector==c)[0]
            train_y_vector_class[index_class] = 1
            train_y_m_class = y_vector_to_matrix(train_y_vector_class, 2)

            test_y_vector_class = np.zeros((test_ins_len))
            index_class = np.where(test_y_vector==c)[0]
            test_y_vector_class[index_class] = 1
            test_y_m_class = y_vector_to_matrix(test_y_vector_class, 2)
            keep_num = 5
            kernel_index = kernel_eval_matrix[c, 0:keep_num]
            ref_kernel_index = ref_kernel_eval_matrix[c, 0:keep_num]
            print("kernel index " + str(kernel_index))
            print("ref kernel index " + str(ref_kernel_index))
            kernel_index = np.concatenate((kernel_index, ref_kernel_index), axis=0)
            print("union index " + str(kernel_index))
            kernel_index = np.unique(kernel_index)
            print("unique index " + str(kernel_index))

            kernel_index = ref_kernel_eval_matrix[c, 0:keep_num]
            train_x_class = train_last_conv[:, :, :, kernel_index]
            test_x_class = test_last_conv[:, :, :, kernel_index]
            print(train_x_class.shape)
            reshape_col = 45 * len(kernel_index)
            train_x_class = train_x_class.reshape((train_ins_len, reshape_col))
            test_x_class = test_x_class.reshape((test_ins_len, reshape_col))
            
            c_eval_value, c_train_time, c_test_time, c_predict_proba = run_nn(train_x_class, train_y_m_class, test_x_class, test_y_m_class, nn_setting)
            all_pred_prob.append(c_predict_proba[:, 1]-c_predict_proba[:, 0])
        all_pred_prob = np.array(all_pred_prob)
        print(all_pred_prob.shape)
        pred_vector = np.argmax(all_pred_prob, axis=0)
        print(pred_vector)
        print(all_pred_prob[:, 0])
        print(all_pred_prob[:, 1])
        print(all_pred_prob[:, 2])

        final_accuracy = accuracy_score(pred_vector, test_y_vector)

        avg_acc, ret_str = averaged_class_based_accuracy(ori_pred_y_vector, test_y_vector)
        print("original avg acc" + str(avg_acc))
        print("original accuracy: " + str(test_accuracy))
        print(ret_str)
        avg_acc, ret_str = averaged_class_based_accuracy(pred_vector, test_y_vector)
        print("avg acc" + str(avg_acc))
        print("new accuracy: " + str(final_accuracy))
        print(ret_str)

        load_result_analysis(all_pred_prob, test_y_vector)

        sdfds
        output_y_placeholder = tf.placeholder(tf.float32, [None, num_classes])
        actual = tf.argmax(output_y_placeholder, axis=1)
        prediction = tf.argmax(logits_out, axis=1)
        correct_prediction = tf.equal(actual, prediction)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        test_eval_value = accuracy.eval(feed_dict={train_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob_placeholder: 1.0})
        print("fisrt")
        print(test_eval_value)

        


        conv_count = 1
        drop_ratio = 0.1

        #conv_variable_up_main(cnn_session, conv_count, drop_ratio)


        weight_name = "conv_w_" + str(0) + ":0"
        bias_name = "conv_b_" + str(0) + ":0"
        ori_weight_variable = tf.get_default_graph().get_tensor_by_name(weight_name)
        ori_bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name)
        weight_variable = tf.get_default_graph().get_tensor_by_name(weight_name)
        bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name)
        ori_weight_variable = cnn_session.run(weight_variable)
        ori_bias_variable = cnn_session.run(bias_variable)
        train_drop_acc = []
        test_drop_acc = []
        for drop_i in range(50):
            drop_weight_variable = np.copy(ori_weight_variable)
            drop_bias_variable = np.copy(ori_bias_variable)
            drop_index = []
            drop_index.append(drop_i)
            
            up_fir_weight, up_fir_bias = conv_variable_up(drop_weight_variable, drop_bias_variable, drop_index)
            weight_assign = tf.assign(weight_variable, up_fir_weight)
            bias_assign = tf.assign(bias_variable, up_fir_bias)
            cnn_session.run(weight_assign)
            cnn_session.run(bias_assign)
            up_bias_variable = tf.get_default_graph().get_tensor_by_name(bias_name)
            up_bias_variable_val = cnn_session.run(bias_variable)
            train_eval_value = accuracy.eval(feed_dict={train_x_placeholder: train_x_matrix, output_y_placeholder: train_y_matrix, keep_prob_placeholder: 1.0})
            train_drop_acc.append(train_eval_value)
            test_eval_value = accuracy.eval(feed_dict={train_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob_placeholder: 1.0})
            test_drop_acc.append(test_eval_value)
            print ("Drop " + str(drop_i))
            print(train_eval_value)
            print(test_eval_value)
        
        print(train_drop_acc)
        print(train_drop_acc.argsort())
        print(test_drop_acc)
        print(test_drop_acc.argsort())

        sdfs
        print("HERE")



        fir_weight_variable_val = np.squeeze(fir_weight_variable_val)
        kernel_dist_val = cnn_session.run(kernel_dist)
        keep_index_val = cnn_session.run(keep_index)
        print(fir_weight_variable_val.shape)
        print(np.amax(fir_weight_variable_val, axis=1))
        print(np.amin(fir_weight_variable_val, axis=1))
        print(np.mean(fir_weight_variable_val, axis=1))
        mean_row = np.mean(fir_weight_variable_val, axis=-1)
        print(mean_row.shape)
        dist_list = []
        for r in range(40):
            row = fir_weight_variable_val[:, r]
            dist_list.append(np.linalg.norm(row-mean_row))
        print (dist_list)
        print(kernel_dist_val)
        print(keep_index_val)
        print(sorted(dist_list))
        print("!!!")
        #conv_variable_up(fir_weight_variable_val, fir_bias_variable_val)
        
        sdfsd

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading(data_folder + train_file, data_folder + test_file, class_column, delimiter, header)
        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)

        train_x_matrix = test_x_matrix[0:1, :, :, :]
        #plot_2dmatrix(np.squeeze(train_x_matrix)[:, 0:5])
        
        fir_out_tensor = tf.nn.conv2d(train_x_placeholder, fir_weight_variable, strides=[1, 1, 1, 1], padding='VALID') + fir_bias_variable
        fir_out_tensor = tf.nn.relu(fir_out_tensor)

        print(fir_out_tensor.get_shape())
        fir_analysis_tensor = tf.reduce_max(fir_out_tensor, [1])
        print(fir_analysis_tensor.get_shape())
        fir_analysis_tensor = tf.reduce_max(fir_analysis_tensor, [1])
        fir_analysis_tensor = tf.reduce_mean(fir_analysis_tensor, [0])

        top_k_indices = tf.nn.top_k(fir_analysis_tensor, 10).indices
        top_k_values = tf.nn.top_k(fir_analysis_tensor, 10).values
        top_fir_out_tensor = tf.gather(fir_out_tensor, top_k_indices, axis=3)

        sec_weight_variable = tf.get_default_graph().get_tensor_by_name("conv_w_1:0")
        sec_bias_variable = tf.get_default_graph().get_tensor_by_name("conv_b_1:0")
        sec_out_tensor = tf.nn.conv2d(fir_out_tensor, sec_weight_variable, strides=[1, 1, 1, 1], padding='VALID') + sec_bias_variable
        sec_out_tensor = tf.nn.relu(sec_out_tensor)
        sec_weight_var_val = cnn_session.run(sec_weight_variable)
        #print(np.squeeze(sec_weight_var_val))
        #sdfds

        #plot_2dmatrix(fir_weight_var_val[:, 4])
        #sdf
        #print(fir_weight_var_val.T)
        fir_out_tensor_val = cnn_session.run(fir_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(fir_out_tensor_val.shape)

        top_fir_out_tensor = cnn_session.run(top_fir_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(top_fir_out_tensor.shape)

        fir_analysis_tensor_val = cnn_session.run(fir_analysis_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(fir_analysis_tensor.shape)

        top_k_indices_val = cnn_session.run(top_k_indices, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        top_k_values_val = cnn_session.run(top_k_values, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        fir_weight_variable_val = cnn_session.run(fir_weight_variable)
        fir_weight_variable_val = np.squeeze(fir_weight_variable_val)
        print(fir_weight_variable_val.shape)
        print(fir_analysis_tensor_val)
        fir_sort_in = np.argsort(fir_analysis_tensor_val)
        print(fir_sort_in)
        print(top_k_indices_val)
        print(top_k_values_val)
        plot_2dmatrix(fir_weight_variable_val[:, fir_sort_in[-10:]])
        sdfd





        for n in range(len(fir_out_tensor_val)):
            for k in range(50):
                ret_str = "k" + str(k) + ": "
                kernel_max = -1
                max_attr = -1
                max_attr_list = []
                for a in range(attr_num):
                    attr_max = max(fir_out_tensor_val[n, :, a, k])
                    max_attr_list.append(attr_max)
                    if attr_max > kernel_max:
                        kernel_max = attr_max
                        max_attr = a
                    if attr_max == 0:
                        ret_str = ret_str + str(a) + " "
                print(ret_str)
                print("max attr " + str(max_attr))
                print(sorted(range(len(max_attr_list)), key=lambda k: max_attr_list[k]))
                print("======")
        print("label " + str(train_y_vector[0]))
        fir_out_tensor_val = cnn_session.run(sec_out_tensor, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        print(fir_out_tensor_val.shape)

        sdf

        for n in range(len(fir_out_tensor_val)):
            for k in range(40):
                ret_str = "k" + str(k) + ": "
                kernel_max = -1
                max_attr = -1
                max_attr_list = []
                for a in range(attr_num):
                    attr_max = max(fir_out_tensor_val[n, :, a, k])
                    max_attr_list.append(attr_max)
                    if attr_max > kernel_max:
                        kernel_max = attr_max
                        max_attr = a
                    if attr_max == 0:
                        ret_str = ret_str + str(a) + " "
                print(ret_str)
                print("max attr " + str(max_attr))
                print(sorted(range(len(max_attr_list)), key=lambda k: max_attr_list[k]))
                print("======")
        sdf

        fir_out_mean_val = cnn_session.run(fir_out_mean, feed_dict={train_x_placeholder: train_x_matrix, keep_prob_placeholder: 1.0})
        #fir_out_mean_val = np.squeeze(fir_out_mean_val)
        print(fir_out_mean_val.shape)

        plot_2dmatrix(np.squeeze(fir_out_mean_val[:, :, 0:5]))

        sdfd
        plot_2dmatrix(fir_weight_var_val)
        



        
        min_class = min(train_y_vector)
        max_class = max(train_y_vector)
        num_classes = max_class - min_class + 1
        if cnn_setting.eval_method == "accuracy":
            cnn_eval_key = "acc"
        elif num_classes > 2:
            cnn_eval_key = "acc_batch"
        else:
            cnn_eval_key = "f1"
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(min_class)+"_" + str(max_class) + "_act" + str(cnn_setting.activation_fun) + "_" + cnn_eval_key + '.log'
    
        print("log file: " + log_file)
    
        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        logger.info(train_x_matrix[0, 0:3, 0:2, 0])
        logger.info(test_x_matrix[0, 0:3, 0:2, 0])

        train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes)
        test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes)










        cnn_eval_value, train_run_time, test_run_time, cnn_predict_proba, saver_file, feature_list_obj_file = run_cnn(train_x_matrix, train_y_matrix, test_x_matrix, test_y_matrix, data_stru, cnn_setting, saver_file_profix, logger)

        logger.info("Fold eval value: " + str(cnn_eval_value))
        logger.info(method + ' fold training time (sec):' + str(train_run_time))
        logger.info(method + ' fold testing time (sec):' + str(test_run_time))
        logger.info("save obj to " + saver_file)
def cnn_classification_main(parameter_file,
                            file_keyword,
                            function_keyword="cnn_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(
        parameter_file, function_keyword)

    print(data_keyword, data_folder, attr_num, attr_len, num_classes,
          start_class, class_column, class_id, obj_folder, method, log_folder,
          out_obj_folder, out_model_folder, cnn_setting_file)

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)

    file_list = list_files(data_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.out_obj_folder = out_obj_folder
    cnn_setting.out_model_folder = out_model_folder
    init_folder(out_obj_folder)
    init_folder(out_model_folder)

    result_obj_folder = obj_folder + method + "_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    delimiter = ' '
    loop_count = -1
    saver_file_profix = ""
    attention_type = 0
    attention_type = -1
    cnn_setting.attention_type = attention_type
    trans_bool = False  # True: means ins * attr_len * 1 * attr_num
    # False: means ins * attr_len * attr_num * 1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        saver_file_profix = file_key + "_atten" + str(attention_type)
        valid_file = data_folder + train_file.replace('train', 'valid')
        if os.path.isfile(valid_file) is False:
            valid_file = ''

        test_file = data_folder + train_file.replace('train', 'test')
        if os.path.isfile(test_file) is False:
            test_file = ''

        data_group, attr_num = train_test_file_reading(
            data_folder + train_file, test_file, valid_file, class_column,
            delimiter, header)
        data_group_processing(data_group, attr_num, trans_bool)
        data_stru = data_group.gene_data_stru()
        data_group.data_check(data_stru.num_classes, data_stru.min_class)
        if cnn_setting.eval_method == "accuracy":
            cnn_eval_key = "acc"
        elif num_classes > 2:
            cnn_eval_key = "acc_batch"
        else:
            cnn_eval_key = "f1"
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            data_stru.min_class
        ) + "_" + str(data_stru.num_classes) + "_act" + str(
            cnn_setting.activation_fun
        ) + "_" + cnn_eval_key + "_attention" + str(attention_type) + '.log'

        print("log file: " + log_file)

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')

        if file_count == 0:
            logger.info('train matrix shape: ' +
                        str(data_group.train_x_matrix.shape))
            logger.info('train label shape: ' +
                        str(data_group.train_y_vector.shape))

        logger.info(data_group.train_x_matrix[0, 0:3, 0:2, 0])
        pred_y_prob, train_run_time, test_run_time, cnn_model = run_cnn(
            cnn_setting, data_group, saver_file_profix, logger)
        pred_y_vector = np.argmax(pred_y_prob, axis=1)
        avg_acc, ret_str = averaged_class_based_accuracy(
            pred_y_vector, data_group.test_y_vector)
        acc_value = accuracy_score(data_group.test_y_vector, pred_y_vector,
                                   True)
        logger.info("Averaged acc: " + str(acc_value))
        logger.info(ret_str)
        logger.info("Fold eval value: " + str(acc_value))
        logger.info(method + ' fold training time (sec):' +
                    str(train_run_time))
        logger.info(method + ' fold testing time (sec):' + str(test_run_time))
        logger.info("save obj to " + cnn_model.saver_file)
Exemple #4
0
def fixed_width_forward_multitime(train_x, train_y, test_x, test_y, n_selected_features, keep_k=5, data_key="test", fold_key="", method="cnn", cnn_setting_file = "../../parameters/cnn_model_parameter.txt", logger=None, function_key="best_forward_multitime"):
    """
    This function implements the forward feature selection algorithm based on decision tree

    Input
    -----
    train_x: {3d numpy array matrix}, shape (n_samples, n_features, time_length)
        input data
    train_y: {1d numpy array vector}, shape (n_samples,)
        input class labels
    test_x: {3d numpy array matrix}, shape (n_samples, n_features, time_length)
        input data
    test_y: {1d numpy array vector}, shape (n_samples,)
        input class labels
    Output
    ------
    F: {numpy array}, shape (n_features, )
        index of selected features
    """

    if logger is None:
        log_file = ""
        logger = setup_logger(log_file)

    train_samples, n_features, time_length = train_x.shape

    eval_method = "f1"
    if method == "cnn":
        min_class = min(train_y)
        max_class = max(train_y)
        num_classes = max_class - min_class + 1
        data_stru = data_structure(num_classes, min_class, n_features, time_length)
        cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        saver_file_profix = "../../object/" + data_key + "/" +function_key + "/cnn_model_folder/"
        saver_file_profix = init_folder(saver_file_profix)
        saver_file_profix = saver_file_profix + fold_key
        eval_method = cnn_setting.eval_method
    elif method == "rf":
        model = RandomForestClassifier(n_estimators=20, random_state=0)
        
    # selected feature set, initialized to contain all features
    F = []
    F_eval_score = []
    F_available = []
    count = len(F)
    if count == 0:
        F_available = range(n_features)
        F_eval_score = np.zeros(n_features) - 1
    while count < n_selected_features:
        max_eval_value = -1
        f_score = []
        logger.info("For iter " + str(count))
        logger.info("available list for this iter: " + str(F_available))
        for i in F_available:
            if i not in F:
                F.append(i)
                train_x_tmp = train_x[:, F, :]
                test_x_tmp = test_x[:, F, :]
                F_key = str(F)[1:-1]

                if method == "cnn":
                    eval_value, train_run_time, test_run_time, predict_proba, saver_file, feature_list_obj_file, relu_based_array = model_evaluation_cnn(train_x_tmp, train_y, test_x_tmp, test_y, data_stru, cnn_setting, saver_file_profix + "_F" + F_key, logger)
                    f_eval_value = eval_value
                elif method == "rf":
                    eval_value, train_run_time, test_run_time = model_evaluation_rf(train_x_tmp, train_y, test_x_tmp, test_y, model, logger)
                    f_eval_value = eval_value
                if count == 0:
                    F_eval_score[i] = eval_value
                logger.info("Features With: " + str(F))
                logger.info("Adding Feature " + str(i) + ": ")
                logger.info(method + " " + eval_method + " Value For Feature " + str(i) + ": " + str(f_eval_value))
                logger.info(method +" Training time (sec): " + str(train_run_time))
                logger.info(method + " Testing time (sec): " + str(test_run_time))
                f_score.append(f_eval_value)
                F.pop()
                # record the feature which results in the largest accuracy
                if eval_value > max_eval_value:
                    max_eval_value = eval_value
                    idx = i
        
        F_eval_score[idx] = -1
        if count == 0:
            F_available = []
            for sel in range(keep_k):
                add_id = np.argmax(F_eval_score)
                F_available.append(add_id)
                F_eval_score[add_id] = -1
        else:
            F_available.remove(idx)
            add_id = np.argmax(F_eval_score)
            F_available.append(add_id)
            F_eval_score[add_id] = -1

        logger.info("Eval score vector: " + str(f_score))
        logger.info("The added attribute is: " + str(idx))
        logger.info("larggest eval value is: " + str(max_eval_value))
        # delete the feature which results in the largest accuracy
        F.append(idx)
        count += 1
    return np.array(F)
Exemple #5
0
def pv_classification_cnn(parameter_file,
                          file_keyword,
                          function_keyword="pv_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file = read_feature_classification(
        parameter_file, function_keyword)

    log_folder = init_folder(log_folder)
    cnn_obj_folder = init_folder(cnn_obj_folder)
    cnn_temp_folder = init_folder(cnn_temp_folder)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.save_obj_folder = cnn_obj_folder
    cnn_setting.temp_obj_folder = cnn_temp_folder
    cnn_setting.eval_method = 'f1'
    init_folder(cnn_obj_folder)
    init_folder(cnn_temp_folder)

    save_obj_folder = obj_folder[:-1] + "_" + method + "_out"
    save_obj_folder = init_folder(save_obj_folder)

    delimiter = ' '
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            class_id) + '_top' + str(top_k) + '_' + method + '.log'
        print "log file: " + log_file
        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')
        found_obj_file = ''
        for obj_file in obj_list:
            if file_key in obj_file:
                found_obj_file = obj_file
                break
        if found_obj_file == '':
            raise Exception('No obj file found')

        #print found_obj_file
        #print cnn_setting.save_obj_folder + file_key + "_" + method +"_projected_result.ckpt"
        #
        found_obj_file = obj_folder + found_obj_file

        feature_dict = load_obj(found_obj_file)[0]
        feature_dict = np.array(feature_dict)
        logger.info("feature array shape: " + str(feature_dict.shape))
        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)

        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num,
                                              attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len,
                                             False)
        data_stru.attr_num = top_k

        if method == 'fcn':
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_ijcnn_fcn(
                train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
                data_stru, cnn_setting, feature_dict, top_k,
                file_key + '_' + method + '_count' + str(file_count), class_id,
                logger)
        else:
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_cnn(
                train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
                data_stru, cnn_setting, feature_dict, top_k,
                file_key + '_' + method + '_count' + str(file_count), class_id,
                logger)

        logger.info("Fold F1: " + str(fold_f1_value))
        logger.info(method + ' fold training time (sec):' +
                    str(fold_train_time))
        logger.info(method + ' fold testing time (sec):' + str(fold_test_time))
        logger.info(method + ' fold accuracy: ' + str(fold_accuracy))
        logger.info("save obj to " + save_obj_folder + file_key + "_" +
                    method + "_project_" + method + "_result.ckpt")
        save_obj([
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time,
            fold_test_time, fold_predict_matrix
        ], save_obj_folder + file_key + "_" + method + "_project_" + method +
                 "_result.ckpt")
Exemple #6
0
def pv_cnn_generation_main(parameter_file,
                           file_keyword,
                           function_keyword="pv_cnn_generation"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_pv_cnn_generation(
        parameter_file, function_keyword)

    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.out_obj_folder = out_obj_folder
    cnn_setting.out_model_folder = out_model_folder
    cnn_setting.feature_method = 'save'
    cnn_setting.eval_method = 'f1'
    init_folder(out_obj_folder)
    init_folder(out_model_folder)

    result_obj_folder = obj_folder + method + "_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    delimiter = ' '
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            class_id) + '_' + method + '.log'

        print "log file: " + log_file

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        #logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')

        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num,
                                              attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len,
                                             False)
        # Call the projected feature function here, just need to set feature_dict = None
        feature_dict = None
        top_k = -1
        model_save_file = file_key + '_count' + str(file_count) + '_' + method

        if method == 'fcn':
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_ijcnn_fcn(
                train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
                data_stru, cnn_setting, feature_dict, top_k, model_save_file,
                class_id, logger)
        else:
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_cnn(
                train_x_matrix, train_y_vector, test_x_matrix, test_y_vector,
                data_stru, cnn_setting, feature_dict, top_k, model_save_file,
                class_id, logger)

        logger.info("Fold F1: " + str(fold_f1_value))
        logger.info(method + ' fold training time (sec):' +
                    str(fold_train_time))
        logger.info(method + ' fold testing time (sec):' + str(fold_test_time))
        logger.info(method + ' fold accuracy: ' + str(fold_accuracy))
        logger.info("save obj to " + result_obj_folder + file_key +
                    "_all_feature_" + method + "_result.ckpt")
        save_obj([
            fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time,
            fold_test_time, fold_predict_matrix
        ], result_obj_folder + file_key + "_all_feature_" + method +
                 "_result.ckpt")
Exemple #7
0
def cnn_classification_main(parameter_file,
                            file_keyword,
                            function_keyword="cnn_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_all_feature_classification(
        parameter_file, function_keyword)

    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file

    log_folder = init_folder(log_folder)
    out_obj_folder = init_folder(out_obj_folder)
    out_model_folder = init_folder(out_model_folder)

    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len,
                                 class_column)

    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.out_obj_folder = out_obj_folder
    cnn_setting.out_model_folder = out_model_folder
    cnn_setting.feature_method = 'none'
    cnn_key = return_cnn_keyword(cnn_setting)
    init_folder(out_obj_folder)
    init_folder(out_model_folder)
    group_all = False
    result_obj_folder = obj_folder + method + "_result_folder"
    result_obj_folder = init_folder(result_obj_folder)

    delimiter = ' '
    loop_count = -1
    saver_file_profix = ""
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        saver_file_profix = file_key
        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column,
            delimiter, header)
        min_class = min(train_y_vector)
        max_class = max(train_y_vector)
        num_classes = max_class - min_class + 1
        if cnn_setting.eval_method == "accuracy":
            cnn_eval_key = "acc"
        elif num_classes > 2:
            cnn_eval_key = "acc_batch"
        else:
            cnn_eval_key = "f1"
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(
            min_class) + "_" + str(max_class) + "_act" + str(
                cnn_setting.activation_fun) + "_" + cnn_eval_key + '.log'

        print "log file: " + log_file

        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')
        #train_y_vector[50:80] = 1
        #test_y_vector[30:40] = 1

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num,
                                              attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len,
                                             False)
        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))

        logger.info(train_x_matrix[0, 0:3, 0:2, 0])
        logger.info(test_x_matrix[0, 0:3, 0:2, 0])

        train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes)
        test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes)

        feature_dict = None
        top_k = -1
        model_save_file = file_key + '_count' + str(file_count) + '_' + method

        cnn_eval_value, train_run_time, test_run_time, cnn_predict_proba, saver_file, feature_list_obj_file, relu_base_array = run_cnn(
            train_x_matrix, train_y_matrix, test_x_matrix, test_y_matrix,
            data_stru, cnn_setting, group_all, saver_file_profix, logger)

        logger.info("Fold eval value: " + str(cnn_eval_value))
        logger.info(method + ' fold training time (sec):' +
                    str(train_run_time))
        logger.info(method + ' fold testing time (sec):' + str(test_run_time))
        logger.info("save obj to " + saver_file)
Exemple #8
0
def multi_projected_cnn_classification_main(parameter_file, file_keyword, function_keyword="multi_proj_classification"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file = read_feature_classification(parameter_file, function_keyword)

    obj_keyword = obj_folder.split('/')[-2]
    
    model_saved_folder = "../../object/" + data_keyword + "/projected_classification/" + obj_keyword + "_top" + str(top_k) + "_cnn_model_folder/"
    print obj_keyword
    print cnn_obj_folder
    print model_saved_folder
    top_keyword = "_top" + str(top_k) + "."
    group_all = False

    log_folder = init_folder(log_folder)
    #cnn_obj_folder = init_folder(cnn_obj_folder)
    #cnn_temp_folder = init_folder(cnn_temp_folder)
    
    data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column)

    file_list = list_files(data_folder)
    obj_list = list_files(obj_folder)
    file_count = 0

    class_column = 0
    header = True

    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)
    cnn_setting.save_obj_folder = cnn_obj_folder
    cnn_setting.temp_obj_folder = cnn_temp_folder
    cnn_setting.eval_method = 'f1'
    #init_folder(cnn_obj_folder)
    #init_folder(cnn_temp_folder) 

    save_obj_folder = "../../object/" + data_keyword + "/" + function_keyword + "/" + obj_keyword + "/" 
    save_obj_folder = init_folder(save_obj_folder)

    delimiter = ' '
    loop_count = -1
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        loop_count = loop_count + 1
        file_key = train_file.replace('.txt', '')
        log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str(class_id) + '_top' + str(top_k) + '_' + method + '.log'
    
        print "log file: " + log_file
    
        logger = setup_logger(log_file, 'logger_' + str(loop_count))
        logger.info('\nlog file: ' + log_file)
        logger.info(train_file)
        logger.info('cnn setting:\n ' + cnn_setting.to_string())
        logger.info('method: ' + method)
        logger.info('============')
        found_obj_file = ''
        for obj_file in obj_list:
            if file_key in obj_file:
                found_obj_file = obj_file
                break
        if found_obj_file == '':
            raise Exception('No obj file found')
        #
        found_obj_file = obj_folder + found_obj_file

        feature_dict = load_obj(found_obj_file)[0]
        feature_dict = np.array(feature_dict)
        logger.info("feature array shape: " + str(feature_dict.shape))
        
        test_file = train_file.replace('train', 'test')

        train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum(
            data_folder + train_file, data_folder + test_file, class_column, delimiter, header)
        

        train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False)
        test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False)

        if file_count == 0:
            logger.info('train matrix shape: ' + str(train_x_matrix.shape))
            logger.info('train label shape: ' + str(train_y_vector.shape))
            logger.info('test matrix shape: ' + str(test_x_matrix.shape))
            logger.info('test label shape: ' + str(test_y_vector.shape))
            logger.info("topk: " + str(top_k) )
        data_stru.attr_num = top_k
        fold_accuracy, fold_f1_list, fold_load_time, fold_test_time = run_load_predict_cnn(file_key, model_saved_folder, feature_dict, top_k, test_x_matrix, test_y_vector, data_stru, cnn_setting, group_all, save_obj_folder, logger)

        logger.info("Fold ACC: " + str(fold_accuracy))
        logger.info("Fold F1 list: " + str(fold_f1_list))
        logger.info(method + ' fold training time (sec):' + str(fold_load_time))
        logger.info(method + ' fold testing time (sec):' + str(fold_test_time))
Exemple #9
0
    print out_conv.get_shape()
    out_conv = tf.reshape(out_conv, [-1, feature_num])
    print std_value
    print feature_num
    predict_y_prob = conf_out_layer(out_conv, feature_num, num_classes,
                                    std_value)
    #print "predict_y_prob"
    print predict_y_prob.get_shape()
    return predict_y_prob, keep_prob_placeholder, keeped_feature_list, saver_file


# End of CNN method

if __name__ == '__main__':
    cnn_setting_file = "../../parameters/cnn_model_parameter.txt"
    cnn_setting = return_cnn_setting_from_file(cnn_setting_file)

    train_row = 20
    test_row = 10
    num_classes = 3
    attr_num = 45
    attr_len = 125
    data_stru = return_data_stru(num_classes, 0, attr_num, attr_len, 0)
    train_x_matrix = np.random.rand(train_row, attr_len, attr_num, 1)
    test_x_matrix = np.random.rand(test_row, attr_len, attr_num, 1)
    train_y_vector = np.array(
        [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 0, 0, 0, 2])
    test_y_vector = np.array([0, 0, 0, 1, 1, 1, 0, 0, 2, 2])
    train_y_matrix = y_vector_to_matrix(train_y_vector, num_classes)
    test_y_matrix = y_vector_to_matrix(test_y_vector, num_classes)
    print train_x_matrix.shape