def data_plot(data_file, class_column=0, delimiter=' '):
    x_matrix, attr_num = file_reading(data_file, delimiter, True)
    x_matrix, y_vector = x_y_spliting(x_matrix, class_column)
    y_min = min(y_vector)
    y_max = max(y_vector)
    x_row, x_col = x_matrix.shape
    attr_len = x_col / attr_num
    x_matrix = x_matrix.reshape(x_row, attr_num, attr_len)
    for label in range(y_min, y_max):
        out_pdf = "asl_class_" + str(label) + ".pdf"
        fig = plt.figure()

        label_index = np.where(y_vector == label)[0]
        label_row = x_matrix[label_index[0], :, :]

        for attr in range(0, attr_num):
            plot_series = label_row[attr, :]
            plot_len = len(plot_series)
            stop_i = plot_len
            for i in range(0, plot_len):
                re_i = plot_len - i - 1
                if plot_series[re_i] == 0:
                    stop_i = stop_i - 1
                else:
                    break

            plt.plot(plot_series[0:stop_i])
        fig.savefig(out_pdf, dpi=fig.dpi)
def data_checking(data_file, class_column=0, delimiter=' '):
    ret_str = ""
    x_matrix, attr_num = file_reading(data_file, delimiter, True)
    x_matrix, y_vector = x_y_spliting(x_matrix, class_column)
    ret_str = 'x_matrix shape: ' + str(x_matrix.shape)
    y_min = min(y_vector)
    y_max = max(y_vector)

    ret_str = ret_str + "\nclass labels from " + str(y_min) + " to " + str(
        y_max)
    #for i in range(y_min, y_max+1):
    #    ret_str = ret_str + '\nclass '+ str(i) + ': '+str(y_vector.count(i))
    unique, counts = np.unique(y_vector, return_counts=True)
    ret_str = ret_str + '\n' + str(dict(zip(unique, counts)))
    return ret_str
def norm_checking(data_file):
    data_matrix, attr_num = file_reading(data_file)
    data_x_matrix, data_y_vector = x_y_spliting(data_matrix, 0)
    data_row, data_col = data_x_matrix.shape
    attr_len = data_col / attr_num
    data_x_matrix = data_x_matrix.reshape(data_row, attr_num, attr_len)
    for row in range(0, data_row):
        for attr in range(0, attr_num):
            series = data_x_matrix[row, attr, :]
            mean = np.mean(series)
            std = np.std(series)
            if mean > 0.0001 or mean < -0.0001:
                return False
            if std > 1.00001 or std < 0.99999:
                return False
    return True
Esempio n. 4
0
def run_cnn_projected_feature_analysis(feature_folder,
                                       class_id,
                                       data_folder,
                                       data_file_keyword,
                                       method="rf_lda",
                                       log_folder='./'):
    data_file_list = list_files(data_folder)
    feature_file_list = list_files(feature_folder)
    out_obj_folder = feature_folder[:-1] + "_" + method
    out_obj_folder = init_folder(out_obj_folder)
    class_column = 0

    for train_file in data_file_list:
        if data_file_keyword not in train_file:
            continue
        data_key = train_file.replace('.txt', '')
        data_matrix, attr_num = file_reading(data_folder + train_file)
        train_x_matrix, train_y_vector = x_y_spliting(data_matrix,
                                                      class_column)
        #train_y_vector = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 3])
        if class_id < 0:
            min_class = min(train_y_vector)
            max_class = max(train_y_vector) + 1
        else:
            min_class = class_id
            max_class = min_class + 1
        log_file = data_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + ".log"
        logger = setup_logger(log_folder + log_file)
        logger.info('data file: ' + train_file)
        out_obj_file = data_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + ".obj"
        out_obj_matrix = []
        for label in range(min_class, max_class):
            logger.info("class: " + str(label))
            feature_key = "_class" + str(label) + "_"
            for feature_file in feature_file_list:
                if data_key not in feature_file or feature_key not in feature_file:
                    continue
                logger.info("feature file: " + feature_file)
                feature_obj = load_obj(feature_folder + feature_file)
                train_feature = obj_processing(feature_obj[0])
                logger.info("train feature shape: " + str(train_feature.shape))
                class_train_y = np.where(train_y_vector == label, 1, 0)
                logger.info("feature method: " + str(method))
                if method == "rf_lda_sum":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis(
                        train_feature, class_train_y, logger)
                elif method == "rf":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis(
                        train_feature, class_train_y, logger)
                elif method == "lda":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis(
                        train_feature, class_train_y, logger)
                elif method == "cpca":
                    class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_cpca_analysis(
                        train_feature, class_train_y, logger)
                if method == "cpca":
                    class_attr_list = class_attr_imp_matrix
                else:
                    logger.info("class attr imp matrix shape: " +
                                str(class_attr_imp_matrix.shape))
                    class_attr_list = map_attr_imp_analysis(
                        class_attr_imp_matrix, logger)
                logger.info(class_attr_list)
                out_obj_matrix.append(class_attr_list)
        out_obj_matrix = np.array(out_obj_matrix)
        logger.info("out obj to: " + out_obj_folder + out_obj_file)
        logger.info(out_obj_matrix.shape)
        save_obj([out_obj_matrix], out_obj_folder + out_obj_file)
Esempio n. 5
0
def run_pure_pv_evaluation(
        file_keyword,
        parameter_file='../../parameters/pv_baseline_evaluation.txt',
        function_keyword="pure_pv_evaluation"):
    data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder = read_pure_feature_generation(
        parameter_file, function_keyword)

    print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder

    file_list = list_files(data_folder)

    file_count = 0
    for train_file in file_list:
        if file_keyword not in train_file:
            continue
        train_key = train_file.replace('.txt', '')
        file_count = file_count + 1

        data_matrix, attr_num = file_reading(data_folder + train_file)
        train_x_matrix, train_y_vector = x_y_spliting(data_matrix,
                                                      class_column)
        train_row, train_col = train_x_matrix.shape
        train_x_matrix = train_x_matrix.reshape(train_row, attr_num, attr_len)
        if class_id < 0:
            min_class = min(train_y_vector)
            max_class = max(train_y_vector) + 1
        else:
            min_class = class_id
            max_class = min_class + 1
        log_file = train_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + "_pure_projected.log"

        #logger = setup_logger('')
        logger = setup_logger(log_folder + log_file)
        print "log file: " + log_folder + log_file
        logger.info(train_file)
        out_obj_file = train_key + "_" + method + "_min" + str(
            min_class) + "_max" + str(max_class) + "_pure_projected.obj"
        out_obj_matrix = []
        logger.info("min class: " + str(min_class))
        logger.info("max class: " + str(max_class))
        for label in range(min_class, max_class):
            class_train_y = np.where(train_y_vector == label, 1, 0)
            logger.info("label: " + str(label))
            if method == 'rf_lda':
                class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis(
                    train_x_matrix, class_train_y, logger)
            elif method == "rf":
                class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis(
                    train_x_matrix, class_train_y, logger)
            elif method == "lda":
                class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis(
                    train_x_matrix, class_train_y, logger)
            logger.info("class attr imp matrix shape: " +
                        str(class_attr_imp_matrix.shape))
            class_attr_list = map_attr_imp_analysis(class_attr_imp_matrix,
                                                    logger)
            logger.info(class_attr_list)
            logger.info(class_attr_list.shape)
            out_obj_matrix.append(class_attr_list)

        out_obj_matrix = np.array(out_obj_matrix)
        logger.info("out obj to: " + out_obj_folder + out_obj_file)
        logger.info(out_obj_matrix.shape)
        save_obj([out_obj_matrix], out_obj_folder + out_obj_file)