Ejemplo n.º 1
0
def seg_flag_cor_batch(path_in, path_out, names):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("processing file: {}".format(file))
        data = pd.read_csv(path_in + file)
        data = seg_flag_cor(data, names)
        data.to_csv(path_out + file, index=False)
Ejemplo n.º 2
0
def interpolate_bin_mean_batch(path_in, path_out, bin_size=5):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("processing file: {}".format(file))
        data = pd.read_csv(path_in + file)
        data = interpolate_bin_mean(data, bin_size)
        data.to_csv(path_out + file, index=False)
Ejemplo n.º 3
0
def del_dup_batch(path_in, path_out):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("processing file: {}".format(file))
        data = pd.read_csv(path_in+file)
        data_new = del_dup(data)
        data_new.to_csv(path_out+file, index=False)
def seg_flag_cor_batch(path_in, path_out, names):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("processing file: {}".format(file))
        data = pd.read_csv(path_in+file)
        data = seg_flag_cor(data, names)
        data.to_csv(path_out+file, index=False)
Ejemplo n.º 5
0
def del_dup_batch(path_in, path_out):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("processing file: {}".format(file))
        data = pd.read_csv(path_in + file)
        data_new = del_dup(data)
        data_new.to_csv(path_out + file, index=False)
Ejemplo n.º 6
0
def sm_mean_1m_batch(path_in, path_out):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("-------- processing file: {}".format(file))
        data = pd.read_csv(path_in + file)
        data = sm_mean_1m(data)
        data.to_csv(path_out + file, index=False)
Ejemplo n.º 7
0
def cal_rolling_path(path_in, path_out, func, win_rate):
    filist = base.get_files_csv(path_in)
    for file in filist:
        print("caculate for file {}".format(file))
        cal_rolling_stats(file_in=path_in + file,
                          file_out=path_out + file,
                          func=func,
                          win_rate=win_rate)
def cal_rolling_path(path_in, path_out, func, win_rate):
    filist =base.get_files_csv(path_in)
    for file in filist:
        print("caculate for file {}".format(file))
        cal_rolling_stats(file_in=path_in + file,
                          file_out=path_out + file,
                          func=func,
                          win_rate=win_rate)
def plot_box_ZX_HW_batch(path):
    filelist = base.get_files_csv(path)
    name = var_name.name_HW1 + var_name.name_HW2
    for file in filelist:
        plt.figure(figsize=(13, 2))
        file_path = path + file
        data = pd.read_csv(file_path)
        plt.boxplot(data[name].as_matrix())
        plt.xticks(np.arange(1, 1 + len(name)), name)
        plt.title(file)
def plot_box_ZX_HW_batch(path):
    filelist = base.get_files_csv(path)
    name = var_name.name_HW1 + var_name.name_HW2
    for file in filelist:
        plt.figure(figsize=(13, 2))
        file_path = path + file
        data = pd.read_csv(file_path)
        plt.boxplot(data[name].as_matrix())
        plt.xticks(np.arange(1, 1+len(name)), name)
        plt.title(file)
Ejemplo n.º 11
0
def parse_time_batch(path_in, path_out):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("process file: {}".format(file))
        data = pd.read_csv(path_in + file)
        data['BTSJ_I'] = parse_time(list(data['BTSJ']))
        names = list(data.columns)
        names.remove('BTSJ_I')
        names.insert(1, 'BTSJ_I')
        data = data[names]
        data.to_csv(path_out + file, index=False)
Ejemplo n.º 12
0
def parse_temp_batch(path_in, path_out):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("processing file {}".format(file))
        data = pd.read_csv(path_in+file)
        if len(data) < 1000:  # discard the dataset with too less records
            print("!!!!!! without parsing file:{} because the number of records too small!".format(file))
            continue
        data = parse_temperature(data)
        data = parse_temp_HW(data)
        data.to_csv(path_out+file, index=False)
Ejemplo n.º 13
0
def parse_time_batch(path_in, path_out):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("process file: {}".format(file))
        data = pd.read_csv(path_in+file)
        data['BTSJ_I'] = parse_time(list(data['BTSJ']))
        names = list(data.columns)
        names.remove('BTSJ_I')
        names.insert(1, 'BTSJ_I')
        data = data[names]
        data.to_csv(path_out+file, index=False)
Ejemplo n.º 14
0
def plot_stats_ZX_HW_batch(path, func):
    plt.figure(figsize=(15, 5))
    name = var_name.name_HW1 + var_name.name_HW2
    filelist = base.get_files_csv(path)
    for file in filelist:
        file_path = path + file
        data = pd.read_csv(file_path)
        stds = data[name].apply(func)
        plt.plot(stds.values, 'o-', label=file)
        plt.xticks(np.arange(stds.count()), stds.index)

    plt.legend(loc='best', prop={'size': 8})
    plt.title("std of ZH_HW")
Ejemplo n.º 15
0
def check_bug_batch(path_in):
    filelist = base.get_files_csv(path_in)
    f_log = open(path_in+"check_bug_log.txt", "w")
    stdout_origin = sys.stdout
    sys.stdout = f_log

    for file in filelist:
        print("--------------- file: {}".format(file))
        check_bug(path_in+file)
        print("---------------\n\n")

    f_log.close()
    sys.stdout = stdout_origin
def check_seg_flag_batch(path, log_dir):
    filelist = base.get_files_csv(path)
    stdout_origin = sys.stdout
    f_log = open(log_dir, "w")
    sys.stdout = f_log

    for file in filelist:
        print("**************** file:{} ***************".format(file))
        check_seg_flag(path+file)
        print("****************************************\t")

    sys.stdout = stdout_origin
    f_log.close()
Ejemplo n.º 17
0
def check_seg_flag_batch(path, log_dir):
    filelist = base.get_files_csv(path)
    stdout_origin = sys.stdout
    f_log = open(log_dir, "w")
    sys.stdout = f_log

    for file in filelist:
        print("**************** file:{} ***************".format(file))
        check_seg_flag(path + file)
        print("****************************************\t")

    sys.stdout = stdout_origin
    f_log.close()
Ejemplo n.º 18
0
def parse_temp_batch(path_in, path_out):
    filelist = base.get_files_csv(path_in)
    for file in filelist:
        print("processing file {}".format(file))
        data = pd.read_csv(path_in + file)
        if len(data) < 1000:  # discard the dataset with too less records
            print(
                "!!!!!! without parsing file:{} because the number of records too small!"
                .format(file))
            continue
        data = parse_temperature(data)
        data = parse_temp_HW(data)
        data.to_csv(path_out + file, index=False)
def plot_stats_ZX_HW_batch(path, func):
    plt.figure(figsize=(15, 5))
    name = var_name.name_HW1 + var_name.name_HW2
    filelist = base.get_files_csv(path)
    for file in filelist:
        file_path = path + file
        data = pd.read_csv(file_path)
        stds = data[name].apply(func)
        plt.plot(stds.values, 'o-', label=file)
        plt.xticks(np.arange(stds.count()), stds.index)

    plt.legend(loc='best', prop={'size': 8})
    plt.title("std of ZH_HW")
Ejemplo n.º 20
0
def check_bug_batch(path_in):
    filelist = base.get_files_csv(path_in)
    f_log = open(path_in + "check_bug_log.txt", "w")
    stdout_origin = sys.stdout
    sys.stdout = f_log

    for file in filelist:
        print("--------------- file: {}".format(file))
        check_bug(path_in + file)
        print("---------------\n\n")

    f_log.close()
    sys.stdout = stdout_origin
Ejemplo n.º 21
0
def cluster_batch(data_dir):
    filelist = base.get_files_csv(data_dir)
    stdout_original = sys.stdout
    log_file = open(data_dir+"dtw_cluster.txt", "w")
    sys.stdout = log_file
    for file in filelist:
        data = pd.read_csv(data_dir + file)
        del data['BTSJ']
        #data = (data-data.mean())/(data.std())
        clust = cluster(data)
        print("-------------------------------------------------------")
        print("********process file: {}*********".format(file))
        print(clust)
        sys.stdout = stdout_original
        log_file.close()
Ejemplo n.º 22
0
def cluster_batch(data_dir):
    filelist = base.get_files_csv(data_dir)
    stdout_original = sys.stdout
    log_file = open(data_dir + "dtw_cluster.txt", "w")
    sys.stdout = log_file
    for file in filelist:
        data = pd.read_csv(data_dir + file)
        del data['BTSJ']
        #data = (data-data.mean())/(data.std())
        clust = cluster(data)
        print("-------------------------------------------------------")
        print("********process file: {}*********".format(file))
        print(clust)
        sys.stdout = stdout_original
        log_file.close()
Ejemplo n.º 23
0
import pandas as pd
import os

from preprocess import base
from ffx_learn import learn_ffx
from ffx_learn import call_ffx


def norm_2(x):
    return (x - np.mean(x)) / (x.max() - x.min())
    #return (x-x.mean()) / np.sqrt(x*x)


root_dir = os.getcwd()
data_dir = root_dir + "/data_0134/integrated_temp_sdmc_mean/"
filelist = base.get_files_csv(data_dir)
print(filelist)

file = filelist[0]
data = pd.read_csv(data_dir + file)
del data['BTSJ']
print(data.columns)
#data = data.apply(norm_2, axis=1)
# ax = data.plot()
# ax.set_title(file)
# #ax.set_ylim([-1, 1])
# plt.show()

# varnames = data.columns
# X = data.as_matrix()
# y = np.zeros(X.shape[0])
Ejemplo n.º 24
0
import numpy as np
import pandas as pd
import os

from preprocess import base
from ffx_learn import learn_ffx
from ffx_learn import call_ffx


def norm_2(x):
    return (x-np.mean(x))/(x.max()-x.min())
    #return (x-x.mean()) / np.sqrt(x*x)

root_dir = os.getcwd()
data_dir = root_dir + "/data_0134/integrated_temp_sdmc_mean/"
filelist = base.get_files_csv(data_dir)
print(filelist)

file = filelist[0]
data = pd.read_csv(data_dir+file)
del data['BTSJ']
print(data.columns)
#data = data.apply(norm_2, axis=1)
# ax = data.plot()
# ax.set_title(file)
# #ax.set_ylim([-1, 1])
# plt.show()

# varnames = data.columns
# X = data.as_matrix()
# y = np.zeros(X.shape[0])