def load_data(root, name):
    with open(
            os.path.join(os.path.join(root, 'training'),
                         'trains_sets_' + name + '.pkl'), 'rb') as f:
        df_train = pickle.load(f)
        print(df_train)
    with open(
            os.path.join(os.path.join(root, 'validation'), 'val_sets_v1.pkl'),
            'rb') as f:
        df_test_fea = pickle.load(f)
        print(df_test_fea)
    with open(
            os.path.join(os.path.join(root, 'validation'),
                         'val_labels_v1.pkl'), 'rb') as f:
        df_test_label = pickle.load(f)
        print(df_test_label)

    X_train_all = df_train.iloc[:, 0:2600].values
    Y_train_all = df_train.iloc[:, 2600:2601].values.reshape(-1)
    X_test = df_test_fea.iloc[:, 0:2600].values
    Y_test = df_test_label.iloc[:, 1:2].values.reshape(-1)

    X_train_all = standa(X_train_all, method='unit')
    X_train_all = savgol_smooth(X_train_all)
    X_test = standa(X_test, method='unit')
    X_test = savgol_smooth(X_test)

    return X_train_all, Y_train_all, X_test, Y_test
def preprocess_x1(features):
    """
    Preprocess X_1: savgol smoothing + standardize + interval
    :param features: raw features
    :return: X_1 features
    """
    print("Preprocess X1")
    x_raw = copy.deepcopy(features)
    x1 = standa(x_raw, method="unit")
    # x1 = remove_abnormal(x1)

    x1 = savgol_filter(x1, window_length=7, polyorder=3)
    x1 = interval_stat(x1, 50)
    print("X1 shape:", x1.shape)
    return x1
import numpy as np

from preprocess.interval import load_df
from preprocess.standardize import standa


def remove_abnormal(x_train: np.ndarray) -> np.ndarray:
    rows, columns = x_train.shape

    mean, sigma = x_train.mean(axis=0), x_train.std(axis=0)
    cond1 = (mean - 6 * sigma < x_train)
    cond2 = (x_train < mean + 6 * sigma)  # type: np.ndarray
    for i in range(rows):
        if cond1[i].all() and cond2[i].all():
            continue
        for j in range(1, columns - 1):
            if not (cond1[i][j] and cond2[i][j]):
                x_train[i][j] = (x_train[i][j - 1] + x_train[i][j + 1]) / 2
    return x_train


if __name__ == '__main__':
    root = "/mnt/data3/caojh/dataset/AstroData/"
    train_file = 'trains_sets_correct.pkl'
    test_file = 'val_labels_v1.pkl'
    X_train, Y_train = load_df(root + 'training',
                               train_file)  # type: (np.ndarray, np.ndarray)
    X_train = standa(X_train)
    X_train = remove_abnormal(X_train)
Exemple #4
0
        block = pd.DataFrame(feature[:, i * step:stop])
        means[:, i], theta[:, i] = block.mean(1), block.var(1)
        skews[:, i], kurts[:, i] = block.skew(1), block.kurt(1)
        argmaxs[:, i], argmins[:, i] = block.idxmax(1), block.idxmin(1)
        maxs[:, i], mins[:, i] = block.max(1), block.min(1)
    return np.concatenate(
        (means, theta, skews, kurts, argmaxs, argmins, maxs, mins), 1)


# In[1]

if __name__ == '__main__':
    root = "/mnt/data3/caojh/dataset/AstroData/"
    train_file = 'trains_sets_correct.pkl'
    test_file = 'val_labels_v1.pkl'
    X_train, Y_train = load_df(root + 'training', train_file)
    X_train = standa(X_train)
    X_train = savgol_filter(X_train, window_length=7, polyorder=3)
    X_train = interval_stat(X_train, 50)

    fea_file = root + 'validation/val_sets_v1.pkl'
    label_file = root + 'validation/val_labels_v1.pkl'
    X_test, Y_test = load_validation(fea_file, label_file)
    X_test = standa(X_test)
    X_test = savgol_filter(X_test, window_length=7, polyorder=3)
    X_test = interval_stat(X_test, 50)

    # In[1]
    clf = train((X_train, Y_train))
    test((X_test, Y_test), clf)
from preprocess.standardize import standa

df_0 = pd.read_csv('test_minconfidence_0.csv')
df_1 = pd.read_csv('test_minconfidence_1.csv')
df_2 = pd.read_csv('test_minconfidence_2.csv')

with open('/mnt/data3/caojh/dataset/AstroData/test/test_sets.pkl', 'rb') as f:
    df_test_fea = pickle.load(f)
    print(df_test_fea)

# with open('X_test.pkl', 'rb') as f:
#     X_test = pickle.load(f)

X_test = df_test_fea.iloc[:, 0:2600].values

X_test = standa(X_test, method='unit')
print('standa')
X_test = savgol_smooth(X_test)
print('smooth')

with open("X_test.pkl", 'wb') as f:
    pickle.dump(X_test, f, protocol=4)
    print("X_test saved")

for i in range(len(df_0)):
    plt.figure()
    index = df_0['index'][i]
    feature = X_test[index].reshape(-1)
    print(index, feature)
    assert feature.shape == (2600, )
    plt.plot(range(2600), feature)