def load_data(root, name): with open( os.path.join(os.path.join(root, 'training'), 'trains_sets_' + name + '.pkl'), 'rb') as f: df_train = pickle.load(f) print(df_train) with open( os.path.join(os.path.join(root, 'validation'), 'val_sets_v1.pkl'), 'rb') as f: df_test_fea = pickle.load(f) print(df_test_fea) with open( os.path.join(os.path.join(root, 'validation'), 'val_labels_v1.pkl'), 'rb') as f: df_test_label = pickle.load(f) print(df_test_label) X_train_all = df_train.iloc[:, 0:2600].values Y_train_all = df_train.iloc[:, 2600:2601].values.reshape(-1) X_test = df_test_fea.iloc[:, 0:2600].values Y_test = df_test_label.iloc[:, 1:2].values.reshape(-1) X_train_all = standa(X_train_all, method='unit') X_train_all = savgol_smooth(X_train_all) X_test = standa(X_test, method='unit') X_test = savgol_smooth(X_test) return X_train_all, Y_train_all, X_test, Y_test
def preprocess_x1(features): """ Preprocess X_1: savgol smoothing + standardize + interval :param features: raw features :return: X_1 features """ print("Preprocess X1") x_raw = copy.deepcopy(features) x1 = standa(x_raw, method="unit") # x1 = remove_abnormal(x1) x1 = savgol_filter(x1, window_length=7, polyorder=3) x1 = interval_stat(x1, 50) print("X1 shape:", x1.shape) return x1
import numpy as np from preprocess.interval import load_df from preprocess.standardize import standa def remove_abnormal(x_train: np.ndarray) -> np.ndarray: rows, columns = x_train.shape mean, sigma = x_train.mean(axis=0), x_train.std(axis=0) cond1 = (mean - 6 * sigma < x_train) cond2 = (x_train < mean + 6 * sigma) # type: np.ndarray for i in range(rows): if cond1[i].all() and cond2[i].all(): continue for j in range(1, columns - 1): if not (cond1[i][j] and cond2[i][j]): x_train[i][j] = (x_train[i][j - 1] + x_train[i][j + 1]) / 2 return x_train if __name__ == '__main__': root = "/mnt/data3/caojh/dataset/AstroData/" train_file = 'trains_sets_correct.pkl' test_file = 'val_labels_v1.pkl' X_train, Y_train = load_df(root + 'training', train_file) # type: (np.ndarray, np.ndarray) X_train = standa(X_train) X_train = remove_abnormal(X_train)
block = pd.DataFrame(feature[:, i * step:stop]) means[:, i], theta[:, i] = block.mean(1), block.var(1) skews[:, i], kurts[:, i] = block.skew(1), block.kurt(1) argmaxs[:, i], argmins[:, i] = block.idxmax(1), block.idxmin(1) maxs[:, i], mins[:, i] = block.max(1), block.min(1) return np.concatenate( (means, theta, skews, kurts, argmaxs, argmins, maxs, mins), 1) # In[1] if __name__ == '__main__': root = "/mnt/data3/caojh/dataset/AstroData/" train_file = 'trains_sets_correct.pkl' test_file = 'val_labels_v1.pkl' X_train, Y_train = load_df(root + 'training', train_file) X_train = standa(X_train) X_train = savgol_filter(X_train, window_length=7, polyorder=3) X_train = interval_stat(X_train, 50) fea_file = root + 'validation/val_sets_v1.pkl' label_file = root + 'validation/val_labels_v1.pkl' X_test, Y_test = load_validation(fea_file, label_file) X_test = standa(X_test) X_test = savgol_filter(X_test, window_length=7, polyorder=3) X_test = interval_stat(X_test, 50) # In[1] clf = train((X_train, Y_train)) test((X_test, Y_test), clf)
from preprocess.standardize import standa df_0 = pd.read_csv('test_minconfidence_0.csv') df_1 = pd.read_csv('test_minconfidence_1.csv') df_2 = pd.read_csv('test_minconfidence_2.csv') with open('/mnt/data3/caojh/dataset/AstroData/test/test_sets.pkl', 'rb') as f: df_test_fea = pickle.load(f) print(df_test_fea) # with open('X_test.pkl', 'rb') as f: # X_test = pickle.load(f) X_test = df_test_fea.iloc[:, 0:2600].values X_test = standa(X_test, method='unit') print('standa') X_test = savgol_smooth(X_test) print('smooth') with open("X_test.pkl", 'wb') as f: pickle.dump(X_test, f, protocol=4) print("X_test saved") for i in range(len(df_0)): plt.figure() index = df_0['index'][i] feature = X_test[index].reshape(-1) print(index, feature) assert feature.shape == (2600, ) plt.plot(range(2600), feature)