Beispiel #1
0
def coef():

    data = readbunchobj('data_set.data')
    cate_feature_col = ['gender', 'receipt_address', 'household_register']

    X = data.X_train
    y = data.y_train
    col = data.col
    x_num_col = [i for i in list(col) if i not in cate_feature_col]

    # prep = StandardScaler()
    # X = prep.fit_transform(X[x_num_col])
    # X = pd.DataFrame(data=X, columns=x_num_col)
    X = X[x_num_col]

    coef_list = []
    for c in X.columns:
        coef_ = np.corrcoef(X[c], y)
        coef_list.append([c, coef_[0][1]])
    coef_df = pd.DataFrame(coef_list)
Beispiel #2
0

def t_model(y_test, y_pred):
    c_m = metrics.confusion_matrix(y_test, y_pred)
    print('真反例:{0}\n假反例:{1}\n真正例:{2}\n假正例:{3}\n'.format(
        c_m[0][0], c_m[1][0], c_m[1][1], c_m[0][1]))
    print("召回率:%.4f" % metrics.recall_score(y_test, y_pred))
    print("查准率:%.4f" % metrics.precision_score(y_test, y_pred))
    print("F1:%.4f" % metrics.f1_score(y_test, y_pred))
    print("roc_auc:%.4f" % metrics.roc_auc_score(y_test, y_pred))
    print("F-measure:%.4f" % (metrics.recall_score(y_test, y_pred) *
                              metrics.precision_score(y_test, y_pred)))


if __name__ == '__main__':
    data = readbunchobj('dataset_woe.data')
    X_train = pd.DataFrame(data.X_train)
    X_test = data.X_test
    y_train = data.y_train
    y_test = data.y_test

    # categorical_features_indices = np.where((X_train.dtypes != np.float) & (X_train.dtypes != np.int64))[0]  # 类型特征的索引

    n = 100  # 做100个子样本
    start = time.time()
    clf_list, clf_score = bagging_boost_fit(X_train, y_train, n)
    end = time.time()
    print(start - end)
    y_pred = bagging_boost_predict(X_test, clf_list, clf_score)
    t_model(y_test, y_pred)
Beispiel #3
0
def get_data():
    data = readbunchobj('data.data')
    return data
Beispiel #4
0
    x_num_col = [i for i in list(col) if i not in cate_feature_col]

    # prep = StandardScaler()
    # X = prep.fit_transform(X[x_num_col])
    # X = pd.DataFrame(data=X, columns=x_num_col)
    X = X[x_num_col]

    coef_list = []
    for c in X.columns:
        coef_ = np.corrcoef(X[c], y)
        coef_list.append([c, coef_[0][1]])
    coef_df = pd.DataFrame(coef_list)


if __name__ == '__main__':
    dataset = readbunchobj('data.data')
    data = dataset.data
    label = dataset.label

    positive_index = label[label['label'] == 1].index.values
    negative_index = label[label['label'] == 0].index.values

    data_1 = data.loc[positive_index]
    data_0 = data.loc[negative_index]

    miss_df = data.isna().sum() / len(data)
    plt.barh(miss_df.index, miss_df.values)

    col = [
        'years', 'score', 'account_rank', 'deal_order_number',
        'avg_order_amount', 'max_pay_amount', 'last_consume_days',
Beispiel #5
0
    return list(sum_y[0])


def test_model(y_test, y_pred):
    c_m = metrics.confusion_matrix(y_test, y_pred)
    print('真反例:{0}\n假反例:{1}\n真正例:{2}\n假正例:{3}\n'.format(
        c_m[0][0], c_m[1][0], c_m[1][1], c_m[0][1]))
    print("召回率:%.4f" % metrics.recall_score(y_test, y_pred))
    print("查准率:%.4f" % metrics.precision_score(y_test, y_pred))
    print("F1:%.4f" % metrics.f1_score(y_test, y_pred))
    print("roc_auc:%.4f" % metrics.roc_auc_score(y_test, y_pred))
    print("F-measure:%.4f" % (metrics.recall_score(y_test, y_pred) *
                              metrics.precision_score(y_test, y_pred)))


if __name__ == '__main__':
    data = readbunchobj('dataset.data')
    X_train = pd.DataFrame(data.X_train)
    X_test = data.X_test
    y_train = data.y_train
    y_test = data.y_test

    categorical_features_indices = np.where((X_train.dtypes != np.float) & (
        X_train.dtypes != np.int64))[0]  # 类型特征的索引

    n = 50  # 做100个子样本

    clf_list, clf_score = bagging_boost_fit(X_train, y_train, n)
    y_pred = bagging_boost_predict(X_test, clf_list, clf_score)
    test_model(y_test, y_pred)
Beispiel #6
0
vae = Model(inputs, outputs, name='vae_mlp')

# reconstruction_loss = binary_crossentropy(inputs, outputs)
# reconstruction_loss *= original_dim
# kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
# kl_loss = K.sum(kl_loss, axis=-1)
# kl_loss *= -0.5
# vae_loss = K.mean(reconstruction_loss + kl_loss)
# vae.add_loss(vae_loss)
vae.compile(optimizer='adam', loss=lambda y_true, y_pred: y_pred)

# train data
import pandas as pd
from prepare import readbunchobj
from sklearn.preprocessing import MinMaxScaler
data = readbunchobj('dataset_delstr.data')
x_train = np.array(data.X_train)
x_test = np.array(data.X_test)
y_train = data.y_train
y_test = data.y_test

scl = MinMaxScaler()
x_train = scl.fit_transform(x_train)
x_test = scl.transform(x_test)

train = pd.DataFrame(x_train)
train['target'] = y_train
train_0 = train[train['target'] == 0]  # 多数类
train_1 = train[train['target'] == 1]  # 少数类

resample_num = 2000  # 需要生成的少数类样本数目
Beispiel #7
0
        x_train_transform = np.hstack((x_train_transform_sigmoid, x_train_transform_tanh))
        x_test_transform = np.hstack((x_test_transform_sigmoid, x_test_transform_tanh))

        return x_train_transform, x_test_transform


if __name__ == '__main__':
    import os

    os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # use GPU with ID=0
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5  # maximun alloc gpu50% of MEM
    config.gpu_options.allow_growth = True  # allocate dynamically

    data = readbunchobj('d:/py/credit_risk/dataset_delstr.data')
    Xtrain = np.array(data.X_train)
    Xtest = data.X_test
    y_train = data.y_train
    y_test = data.y_test
    prep = MinMaxScaler()
    Xtrain = prep.fit_transform(Xtrain)
    Xtest = prep.transform(Xtest)

    n, m = Xtrain.shape
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
    list1 = [m, 21, 17, 9]
    eta = 0.01
    training_epochs = 30
    bitch_size = 100