Ejemplo n.º 1
0
def data_process():

    #    data = sio.loadmat('planetary_time_signal.mat')
    #    x_data = data['planetary_feature']
    #    y_data = data['planetary_feature_target']

    data = sio.loadmat('dataset.mat')
    x_data = data['f_data0']
    y_data = data['f_label']

    #x_data=norm_ZS(x_data)
    #balanced dataset
    x_train_b, x_test_b, y_train_b, y_test_b = train_test_split(x_data,
                                                                y_data,
                                                                test_size=0.2)

    #imbalanced dataset
    #    imbalanced_dict = {0: 50, 1: 20, 2:20, 3:20, 4:5, 5:5, 6:5, 7:2}
    imbalanced_dict = {0: 50, 1: 30, 2: 30, 3: 30, 4: 15, 5: 15, 6: 15, 7: 10}
    #    imbalanced_dict = {0: 50, 1: 10, 2:10, 3:10, 4:3, 5:3, 6:3, 7:1}
    x_train_im, y_train_im, x_test_im, y_test_im, imbalanced_dict_1 = imbalanced_data(
        x_data, y_data, imbalanced_dict, refresh=False, seed=1)
    #x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im)
    #x_test_i, y_test_i = shuffle_data(x_test_im, y_test_im)
    #np.savetxt("y_train_i.txt", y_train_i)
    #y_train_i = to_categorical(y_train_i)
    #y_test_i = to_categorical(y_test_i)

    #%%computer class weight
    #sklearn class_weight
    #multi_class_weight=calculate_class_weigh(y_train)

    #own design class_weight
    multi_class_weight = create_class_weight(imbalanced_dict_1)
    split = 0.6
    multi_sample_weight, ir_overall = creat_sample_weight(
        imbalanced_dict_1, multi_class_weight, split)

    x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im)
    xx_train, train_weight = shuffle_data(x_train_im, multi_sample_weight)
    train_weight = train_weight.reshape((len(train_weight), 1))
    x_test_i, y_test_i = shuffle_data(x_test_im, y_test_im)
    np.savetxt("y_train_i.txt", y_train_i)
    y_train_i = to_categorical(y_train_i)
    y_test_i = to_categorical(y_test_i)

    x_test_b, y_test_b = shuffle_data(x_test_b, y_test_b)

    #    x_train_i = x_train_i[:,0:1920]
    #    x_test_i = x_test_i[:,0:1920]

    return x_train_i, y_train_i, x_test_b, y_test_b, train_weight, ir_overall
Ejemplo n.º 2
0
def data_process():

    #    data = sio.loadmat('planetary_time_signal.mat')
    #    x_data = data['planetary_feature']
    #    y_data = data['planetary_feature_target']

    data = sio.loadmat('dataset.mat')
    x_data = data['f_data0']
    y_data = data['f_label']

    #x_data=norm_ZS(x_data)
    #balanced dataset
    x_train_b, x_test_b, y_train_b, y_test_b = train_test_split(x_data,
                                                                y_data,
                                                                test_size=0.2)

    #imbalanced dataset
    imbalanced_dict = {0: 50, 1: 20, 2: 20, 3: 20, 4: 5, 5: 5, 6: 5, 7: 2}
    #    imbalanced_dict = {0: 50, 1: 30, 2:30, 3:30, 4:15, 5:15, 6:15, 7:10}
    #    imbalanced_dict = {0: 50, 1: 10, 2:10, 3:10, 4:3, 5:3, 6:3, 7:1}
    x_train_im, y_train_im, x_test_im, y_test_im, imbalanced_dict_1 = imbalanced_data(
        x_data, y_data, imbalanced_dict, refresh=False, seed=1)
    #x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im)
    #x_test_i, y_test_i = shuffle_data(x_test_im, y_test_im)
    #np.savetxt("y_train_i.txt", y_train_i)
    #y_train_i = to_categorical(y_train_i)
    #y_test_i = to_categorical(y_test_i)
    smote_tomek = SMOTETomek(random_state=0)
    x_train_im, y_train_im = smote_tomek.fit_sample(x_train_im, y_train_im)

    x_train_i, y_train_i = shuffle_data(x_train_im, y_train_im)
    y_train_i = to_categorical(y_train_i)

    x_test_b, y_test_b = shuffle_data(x_test_b, y_test_b)

    #    x_train_i = x_train_i[:,0:1920]
    #    x_test_i = x_test_i[:,0:1920]

    return x_train_i, y_train_i, x_test_b, y_test_b