Esempio n. 1
0
def test_WDL(sparse_feature_num, wide_feature_num):
    model_name = "WDL"
    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, sparse_feature_num)

    model = WDL(feature_columns, feature_columns, dnn_hidden_units=[32, 32], dnn_dropout=0.5)
    check_model(model, model_name, x, y)
Esempio n. 2
0
def test_WDL(sparse_feature_num, dense_feature_num):
    if version.parse(tf.__version__) >= version.parse('2.0.0'):
        return
    model_name = "WDL"
    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
                                          dense_feature_num=dense_feature_num, hash_flag=True)

    model = WDL(feature_columns, feature_columns,
                dnn_hidden_units=[4, 4], dnn_dropout=0.5)
    check_model(model, model_name, x, y)
Esempio n. 3
0
def test_WDL():
    name = "WDL"

    sample_size = 64
    feature_dim_dict = {
        'sparse': {
            'sparse_1': 2,
            'sparse_2': 5,
            'sparse_3': 10
        },
        'dense': ['dense_1', 'dense_2', 'dense_3']
    }
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = WDL(feature_dim_dict,
                feature_dim_dict,
                hidden_size=[32, 32],
                keep_prob=0.5)
    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x + x, y, batch_size=100, epochs=1, validation_split=0.5)
    print(name + " test train valid pass!")
    model.save_weights(name + '_weights.h5')
    model.load_weights(name + '_weights.h5')
    print(name + "test save load weight pass!")
    save_model(model, name + '.h5')
    model = load_model(name + '.h5', custom_objects)
    print(name + "test save load model pass!")

    print(name + " test pass!")
Esempio n. 4
0
def test_WDL(sparse_feature_num, wide_feature_num):
    model_name = "WDL"
    sample_size = SAMPLE_SIZE
    feature_dim_dict = {"sparse": [], 'dense': []}
    wide_feature_dim_dict = {"sparse": [], 'dense': []}
    for name, num in zip(["sparse", "dense"],
                         [sparse_feature_num, sparse_feature_num]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name].append(
                    SingleFeat(name + '_' + str(i), np.random.randint(1, 10)))
        else:
            for i in range(num):
                feature_dim_dict[name].append(
                    SingleFeat(name + '_' + str(i), 0))
    for name, num in zip(["sparse", "dense"],
                         [wide_feature_num, wide_feature_num]):
        if name == "sparse":
            for i in range(num):
                wide_feature_dim_dict[name].append(
                    SingleFeat(name + 'wide_' + str(i),
                               np.random.randint(1, 10)))
        else:
            for i in range(num):
                wide_feature_dim_dict[name].append(
                    SingleFeat(name + 'wide_' + str(i), 0))

    sparse_input = [
        np.random.randint(0, feat.dimension, sample_size)
        for feat in feature_dim_dict['sparse']
    ]
    dense_input = [
        np.random.random(sample_size) for _ in feature_dim_dict['dense']
    ]
    wide_sparse_input = [
        np.random.randint(0, feat.dimension, sample_size)
        for feat in wide_feature_dim_dict['sparse']
    ]
    wide_dense_input = [
        np.random.random(sample_size) for _ in wide_feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input
    x_wide = wide_sparse_input + wide_dense_input

    model = WDL(feature_dim_dict,
                wide_feature_dim_dict,
                dnn_hidden_units=[32, 32],
                dnn_dropout=0.5)
    check_model(model, model_name, x + x_wide, y)
Esempio n. 5
0
def test_WDL(sparse_feature_num, wide_feature_num):
    model_name = "WDL"
    sample_size = 64
    feature_dim_dict = {"sparse": {}, 'dense': []}
    wide_feature_dim_dict = {"sparse": {}, 'dense': []}
    for name, num in zip(["sparse", "dense"],
                         [sparse_feature_num, sparse_feature_num]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name][name + '_' +
                                       str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                feature_dim_dict[name].append(name + '_' + str(i))
    for name, num in zip(["sparse", "dense"],
                         [wide_feature_num, wide_feature_num]):
        if name == "sparse":
            for i in range(num):
                wide_feature_dim_dict[name][name + 'wide_' +
                                            str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                wide_feature_dim_dict[name].append(name + 'wide_' + str(i))

    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]
    wide_sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in wide_feature_dim_dict['sparse'].values()
    ]
    wide_dense_input = [
        np.random.random(sample_size)
        for name in wide_feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input
    x_wide = wide_sparse_input + wide_dense_input

    model = WDL(feature_dim_dict,
                wide_feature_dim_dict,
                hidden_size=[32, 32],
                keep_prob=0.5)
    check_model(model, model_name, x + x_wide, y)
Esempio n. 6
0
def custom_model():
    sparse_features = ["C" + str(i) for i in range(1, 27)]
    dense_features = ["I" + str(i) for i in range(1, 14)]
    fixlen_feature_columns = [
        SparseFeat(
            feat,
            vocabulary_size=10000,
            embedding_dim=4,
            dtype="string",
            use_hash=True,
        ) for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    model = WDL(fixlen_feature_columns, fixlen_feature_columns, task="binary")
    return model
Esempio n. 7
0
def find_l2_reg_embedding(linear_feature_columns, dnn_feature_columns,
                          train_model_input, train, test_model_input, test):
    cols = ['l2_reg_embedding', 'RMSE', 'MAE', 'MSE', 'AUC']
    df_result = pd.DataFrame(columns=cols,
                             index=range(
                                 len(config.param_rand['l2_reg_embedding'])))
    for i, x in enumerate(config.param_rand['l2_reg_embedding']):

        ##Add dnn_hidden_units as b later
        model = WDL(
            linear_feature_columns,
            dnn_feature_columns,
            # ADD LATER
            dnn_hidden_units=(2, 2),
            l2_reg_linear=0.1,
            l2_reg_embedding=x,
            l2_reg_dnn=0,
            init_std=0.0001,
            seed=1024,
            task='binary')

        model.compile("adam", "mse", metrics=['mse'])
        history = model.fit(
            train_model_input,
            train[target].values,
            batch_size=256,
            epochs=config.model_epoch['epoch'],
            verbose=2,
            validation_split=0.2,
        )
        pred_ans = model.predict(test_model_input, batch_size=256)

        auc = roc_auc_score(test[target].values, pred_ans)
        df_result.loc[i].l2_reg_embedding = x
        df_result.loc[i].RMSE = np.round(
            math.sqrt(mean_squared_error(test[target].values, pred_ans)), 3)
        df_result.loc[i].MAE = np.round(
            mean_absolute_error(test[target].values, pred_ans), 3)
        df_result.loc[i].MSE = np.round(
            mean_squared_error(test[target].values, pred_ans), 3)
        df_result.loc[i].AUC = np.round(auc, 3)
    return df_result
Esempio n. 8
0
def widendeep_model(linear_feature_columns, dnn_feature_columns,
                    train_model_input, train, test_model_input, test):
    cols = ['model', 'RMSE', 'MAE', 'MSE', 'AUC', 'score']
    df_result = pd.DataFrame(columns=cols, index=range(1))
    model = WDL(
        linear_feature_columns,
        dnn_feature_columns,
        dnn_hidden_units=config.widendeep_att["dnn_hidden_units"],
        #l2_reg_linear=config.widendeep_att["l2_reg_linear"],
        # l2_reg_embedding=config.widendeep_att["l2_reg_embedding"],
        #l2_reg_dnn=config.widendeep_att["l2_reg_dnn"],
        #  init_std=config.widendeep_att["init_std"],
        dnn_dropout=config.widendeep_att['dnn_dropout'],
        dnn_activation=config.widendeep_att['dnn_activation'],
        seed=config.widendeep_att["seed"],
        task=config.widendeep_att["task"])

    model.compile("adam", "mse", metrics=['mse'])

    history = model.fit(train_model_input,
                        train[target].values,
                        batch_size=256,
                        epochs=config.model_epoch['epoch'],
                        verbose=2,
                        validation_split=0.2)

    pred_ans = model.predict(test_model_input, batch_size=256)
    save_model(model, 'saved_widendeep.h5')  # save_model
    auc = roc_auc_score(test[target].values, pred_ans)

    df_result.loc[0].model = "Wide and Deep"
    df_result.loc[0].RMSE = np.round(
        math.sqrt(mean_squared_error(test[target].values, pred_ans)), 3)
    df_result.loc[0].MAE = np.round(
        mean_absolute_error(test[target].values, pred_ans), 3)
    df_result.loc[0].MSE = np.round(
        mean_squared_error(test[target].values, pred_ans), 3)
    df_result.loc[0].AUC = np.round(auc, 3)

    return df_result
target = ['rating']

# 对特征标签进行编码
for feature in sparse_features:
    lbe = LabelEncoder()
    data[feature] = lbe.fit_transform(data[feature])
# 计算每个特征中的 不同特征值的个数
fixlen_feature_columns = [SparseFeat(feature, data[feature].nunique()) for feature in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

# 将数据集切分成训练集和测试集
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}

# 使用WDL进行训练
model = WDL(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=100, verbose=True, validation_split=0.2, )
plt.figure()
x = range(len(history.history['loss']))
plt.plot(x, history.history['loss'])
plt.title('loss')
# 使用WDL进行预测
pred_ans = model.predict(test_model_input, batch_size=256)
# 输出RMSE或MSE
mse = round(mean_squared_error(test[target].values, pred_ans), 4)
rmse = mse ** 0.5
print("test RMSE", rmse)
Esempio n. 10
0
    dnn_feature_columns = fixlen_feature_columns
    linear_feature_columns = fixlen_feature_columns

    feature_names = get_feature_names(linear_feature_columns +
                                      dnn_feature_columns)

    # 3.generate input data for model

    train, test = train_test_split(data, test_size=0.2)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    # 4.Define Model,train,predict and evaluate
    model = WDL(linear_feature_columns,
                dnn_feature_columns,
                task='binary',
                dnn_hidden_units=(400, 400, 400),
                dnn_dropout=0.5)
    model.compile(
        "adam",
        "binary_crossentropy",
        metrics=['binary_crossentropy'],
    )

    history = model.fit(
        train_model_input,
        train[target].values,
        batch_size=256,
        epochs=10,
        verbose=2,
        validation_split=0.2,
    fix_len_feature_columns = [
        SparseFeat(feature, data[feature].nunique())
        for feature in sparse_features
    ]
    linear_feature_columns = fix_len_feature_columns
    dnn_feature_columns = fix_len_feature_columns
    feature_names = get_feature_names(linear_feature_columns +
                                      dnn_feature_columns)

    # 将数据集切分成训练集和测试集
    train, test = train_test_split(data, test_size=0.2)
    train_set = {name: train[name].values for name in feature_names}
    test_set = {name: test[name].values for name in feature_names}

    # 使用 WDL 进行训练
    model = WDL(linear_feature_columns, dnn_feature_columns, task='regression')
    model.compile('adam', 'mse', metrics=['mse'])
    history = model.fit(train_set,
                        train[target].values,
                        batch_size=256,
                        epochs=1,
                        verbose=True,
                        validation_split=0.2)

    # 使用 WDL 进行预测
    pred_ans = model.predict(test_set, batch_size=256)

    # 输出 RMSE 或者 MSE
    mse = round(mean_squared_error(test[target].values, pred_ans), 4)
    rmse = mse**0.5
    print(f'test rmse: {rmse}')
Esempio n. 12
0
    if model_type == "xDeepFM":
        model = xDeepFM(
            linear_feature_columns,
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            dnn_hidden_units=[400, 400],
            cin_layer_size=[200, 200, 200],
        )

    if model_type == "WDL":
        model = WDL(
            linear_feature_columns,
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            dnn_hidden_units=[1024, 512, 256],
        )

    if model_type == "DCN":
        model = DCN(
            dnn_feature_columns,
            task="binary",
            embedding_size=emb_dim,
            dnn_hidden_units=[1024, 1024],
            cross_num=6,
        )

    if opt == "adagrad":
        optimizer = Adagrad
Esempio n. 13
0
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    # 2.count #unique features for each sparse field
    sparse_feature_dim = {
        feat: data[feat].nunique()
        for feat in sparse_features
    }
    # 3.generate input data for model
    model_input = [data[feat].values for feat in sparse_feature_dim]

    if mode == 'train':
        # 4.Define Model,compile and train
        model = WDL({
            "sparse": sparse_feature_dim,
            "dense": []
        },
                    final_activation='sigmoid')

        model.compile("adam",
                      "binary_crossentropy",
                      metrics=['binary_crossentropy'])

        filepath = 'model_save/wdl_sample-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
        checkpoint = ModelCheckpoint(filepath,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min')

        history = model.fit(
Esempio n. 14
0
                        length_name='seq_length')
    ]
behavior_feature_list = ['itemId', 'category']

if sys.argv[1] == 'DeepFM_UDG':
    model = DeepFM_UDG(linear_feature_columns, dnn_feature_columns, untrainable_features_columns, 
                       (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DeepFM':
    model = DeepFM(linear_feature_columns, dnn_feature_columns, [], (200, 80))
elif sys.argv[1] == 'PNN_UDG':
    model = PNN_UDG(dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, 
                    udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'PNN':
    model = PNN(dnn_feature_columns, untrainable_features_columns, (200, 80))
elif sys.argv[1] == 'WDL':
    model = WDL(linear_feature_columns, dnn_feature_columns, [], (200, 80))
elif sys.argv[1] == 'WDL_UDG':
    model = WDL_UDG(linear_feature_columns, dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DIEN':
    model = DIEN(fixlen_feature_columns, behavior_feature_list,
             dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True)
elif sys.argv[1] == 'DIEN_UDG':
    model = DIEN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DIN':
    model = DIN(fixlen_feature_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0)
elif sys.argv[1] == 'DIN_UDG':
    model = DIN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
    
if sys.argv[4] == 'focal':
    model.compile("adam", loss=focal_loss, metrics=['binary_crossentropy'], )
else:
Esempio n. 15
0
train_model_input['genres'] = genres_list[:len(train), :]
test_model_input['genres'] = genres_list[len(train):, :]
target = ['rating']

# callback
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
callbacks = [EarlyStopping(monitor='val_loss', patience=3, min_delta=1e-2)]

# 6,建立模型
model = WDL(
    linear_feature_columns,
    dnn_feature_columns,
    task='regression',
    l2_reg_linear=1e-5,
    l2_reg_embedding=1e-5,
    l2_reg_dnn=0.01,
    init_std=0.0001,
    dnn_hidden_units=(256, 128),
    seed=1024,
    dnn_dropout=0,
    dnn_activation='relu',
)
model.summary()

# 可以进行调优
from tensorflow.keras.optimizers import Adam
optmizer = Adam(1e-4)
model.compile(
    optmizer,
    "mse",
    metrics=['mse'],