def var_corr_significant(path):
    filelist = preprocess.get_filelist(path)
    filelist = np.sort(filelist)
    corr_criterion = 0.5
    log_file = open(
        "result/corr_log_{0}_{1}.txt".format(
            path.split('/')[-2], corr_criterion), "w")
    print(
        "********************** file correlation large than {} ***************"
        .format(corr_criterion),
        file=log_file)
    for file in filelist:
        print(
            "\n\n*************************** file: {} **********************".
            format(file),
            file=log_file)
        data = pd.read_csv(path + file, index_col=0)
        names = data.columns
        for col_name in names[1::]:
            series = data[col_name]
            filter = series.abs() > corr_criterion
            print("--------------- {} ------------------".format(col_name),
                  file=log_file)
            for index, value in zip(names[filter], series[filter]):
                print("{0}\t\t{1}".format(index, value), file=log_file)
def select_spercific():
    path = "result/corr_interpolation_10s_int/"
    filelist = preprocess.get_filelist(path)
    filelist = np.sort(filelist)
    file_log = open("result/corr_zw.txt", "w")
    for file in filelist:
        print(
            "**************** result of file: {} **************".format(file),
            file=file_log)
        data = pd.read_csv(path + file, index_col=0)
        for i in np.arange(1, 7):
            for j in np.arange(1, 7):
                name = 'ZX_WD_{0}_{1}'.format(i, j)
                print("-------- {} --------------".format(name), file=file_log)
                names_corr = list()
                for k in np.arange(1, 7):
                    names_corr.insert(len(names_corr),
                                      'ZX_WD_{0}_{1}'.format(i, k))
                for k in np.arange(1, 7):
                    if k == i:
                        continue
                    names_corr.insert(len(names_corr),
                                      'ZX_WD_{0}_{1}'.format(k, j))
                for index, value in zip(names_corr, data.loc[names_corr,
                                                             name]):
                    print("{0}\t\t{1}".format(index, value), file=file_log)
        print(
            "***********************************************************************\n\n",
            file=file_log)
def cal_correlation(path_in, path_out):
    filelist = preprocess.get_filelist(path_in)
    for file in filelist:
        print(file)
        data = pd.read_csv(path_in + file, index_col=0)
        result = data.corr()
        result.to_csv(path_out + file)
Esempio n. 4
0
def pca_dataset_plot(path):
    def pd_plot(data):
        ax = data_plot.plot(figsize=(13, 2))
        ax.legend(loc='best', prop={'size': 6})

    filelist = preprocess.get_filelist(path)
    for file in filelist:
        print("---------------{}----------".format(file))
        data = pd.read_csv(path + file)

        # other
        data_plot = data[static_variable.name_pca_other]
        axs = data_plot.plot(subplots=True,
                             layout=(np.int(data_plot.shape[1] / 2), 2),
                             figsize=(15, 8),
                             style='o')
        # HW1
        data_plot = data[static_variable.name_HW1]
        pd_plot(data_plot)
        # HW2
        data_plot = data[static_variable.name_HW2]
        pd_plot(data_plot)
        # ZW
        data_plot = data[static_variable.name_pca_ZWNo1_1]
        pd_plot(data_plot)
        data_plot = data[static_variable.name_pca_ZWNo1_2]
        pd_plot(data_plot)
        data_plot = data[static_variable.name_pca_ZWNo1_3]
        pd_plot(data_plot)
        data_plot = data[static_variable.name_pca_ZWNo1_4]
        pd_plot(data_plot)
        data_plot = data[static_variable.name_pca_ZWNo1_5]
        pd_plot(data_plot)
        data_plot = data[static_variable.name_pca_ZWNo1_6]
        pd_plot(data_plot)
Esempio n. 5
0
def cal_rolling_path(path_in, path_out, func, win):
    filist = preprocess.get_filelist(path_in)
    for file in filist:
        print("caculate for file {}".format(file))
        cal_rolling_stats(file_in=path_in + file,
                          file_out=path_out + file,
                          func=func,
                          win=win)
Esempio n. 6
0
def pca_transformed_main():
    path_in = "data/interpolation_10s_int/"
    path_out = "data/pca1/"
    file_log = open(path_out + "log.txt", "w")
    filelist = preprocess.get_filelist(path_in)

    for file in filelist:
        print("------------- start to process {} ------------".format(file),
              file=file_log)
        data = pd.read_csv(path_in + file)
        data_new = pd.DataFrame()

        # other variables
        for name in name_Others:
            data_new[name] = data[name]

        # ZX_HW1 and ZX_HW2
        print("ZX_HW:", file=file_log)
        for i in [1, 2]:
            names = list()
            for j in np.arange(1, 7):
                name = "ZX_HW{0}_{1}".format(i, j)
                names.insert(len(names), name)
            ratio, value_transformed = pca_transformed(data[names])
            print(ratio, file=file_log)
            for k in np.arange(len(names)):
                data_new[names[k]] = value_transformed[:, k]

        # ZX_WD_No1: ZX_WD_1_*
        print("ZX_WD_No1: ", file=file_log)
        name_prefix_no1 = "No1_"
        for i in np.arange(1, 7):
            names = list()
            for j in np.arange(1, 7):
                name = "ZX_WD_{0}_{1}".format(i, j)
                names.insert(len(names), name)
            ratio, value_transformed = pca_transformed(data[names])
            print(ratio, file=file_log)
            for k in np.arange(len(names)):
                data_new[name_prefix_no1 + names[k]] = value_transformed[:, k]

        #  ZX_WD_No2: ZX_WD_ * _1
        name_prefix_no2 = "No2_"
        print("ZX_WD_No2: ", file=file_log)
        for i in np.arange(1, 7):
            names = list()
            for j in np.arange(1, 7):
                name = "ZX_WD_{0}_{1}".format(j, i)
                names.insert(len(names), name)
            ratio, value_transformed = pca_transformed(data[names])
            print(ratio, file=file_log)
            for k in np.arange(len(names)):
                data_new[name_prefix_no2 + names[k]] = value_transformed[:, k]

        data_new.to_csv(path_out + file, index=False)
        print("----------------  end ----------------------------",
              file=file_log)
Esempio n. 7
0
def plot_ZH_HW_box_all(path):
    filelist = preprocess.get_filelist(path)
    name = [
        'ZX_HW1_1', 'ZX_HW1_2', 'ZX_HW1_3', 'ZX_HW1_4', 'ZX_HW1_5', 'ZX_HW1_6',
        'ZX_HW2_1', 'ZX_HW2_2', 'ZX_HW2_3', 'ZX_HW2_4', 'ZX_HW2_5', 'ZX_HW2_6'
    ]
    for file in filelist:
        plt.figure(figsize=(13, 2))
        file_path = path + file
        data = pd.read_csv(file_path)
        plt.boxplot(data[name].as_matrix())
        plt.xticks(np.arange(1, 1 + len(name)), name)
        plt.title(file)
Esempio n. 8
0
def plot_ZH_HW_stats_all(path, func):
    plt.figure(figsize=(15, 5))
    name = [
        'ZX_HW1_1', 'ZX_HW1_2', 'ZX_HW1_3', 'ZX_HW1_4', 'ZX_HW1_5', 'ZX_HW1_6',
        'ZX_HW2_1', 'ZX_HW2_2', 'ZX_HW2_3', 'ZX_HW2_4', 'ZX_HW2_5', 'ZX_HW2_6'
    ]
    filelist = preprocess.get_filelist(path)
    filelist = np.sort(filelist)
    for file in filelist:
        file_path = path + file
        data = pd.read_csv(file_path)
        stds = data[name].apply(func)
        plt.plot(stds.values, 'o-', label=file)
        plt.xticks(np.arange(stds.count()), stds.index)

    plt.legend(loc='best', prop={'size': 8})
    plt.title("std of ZH_HW")
Esempio n. 9
0
train_data_dir = 'train'
# validation directory path
val_data_dir = 'val'

# Dirs
train_data_clean_dir = os.path.join(data_path, train_data_dir, 'clean')
train_data_noisy_dir = os.path.join(data_path, train_data_dir, 'noisy')
val_data_clean_dir = os.path.join(data_path, val_data_dir, 'clean')
val_data_noisy_dir = os.path.join(data_path, val_data_dir, 'noisy')
# print(train_data_clean_dir, train_data_noisy_dir, val_data_clean_dir, val_data_noisy_dir, sep='\n')

# Preprocessing data
print('Start preprocessing data (may take a few minutes)...')
train_data_clean = list(
    map(lambda x: (preprocess.load_and_convert(x), 0),
        preprocess.get_filelist(train_data_clean_dir)))
train_data_noisy = list(
    map(lambda x: (preprocess.load_and_convert(x), 1),
        preprocess.get_filelist(train_data_noisy_dir)))
val_data_clean = list(
    map(lambda x: (preprocess.load_and_convert(x), 0),
        preprocess.get_filelist(val_data_clean_dir)))
val_data_noisy = list(
    map(lambda x: (preprocess.load_and_convert(x), 1),
        preprocess.get_filelist(val_data_noisy_dir)))

print(
    f'Train data: clean - {len(train_data_clean)}, noisy - {len(train_data_noisy)}\n'
    f'Val data: clean - {len(val_data_clean)} , noisy - {len(val_data_noisy)}')

# Make train and val datasets