Ejemplo n.º 1
0
    def build_model(
        self,
        embedding_dim=4,
        task='binary',
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy'],
        device='cpu',
    ):
        fixlen_feature_columns = [
            SparseFeat(
                feat,
                vocabulary_size=self.vocabulary_size_dict[feat],
                embedding_dim=embedding_dim,
            ) for feat in self.sparse_features
        ]

        if self.variable_length_features:
            varlen_feature_columns = [
                VarLenSparseFeat(
                    SparseFeat(
                        feat,
                        vocabulary_size=self.vocabulary_size_dict[feat],
                        embedding_dim=embedding_dim,
                    ),
                    maxlen=self.variable_length_features_max_len[feat],
                    combiner='mean',
                ) for feat in self.variable_length_features
            ]
        else:
            varlen_feature_columns = []

        linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
        dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns

        model = DeepFM(linear_feature_columns,
                       dnn_feature_columns,
                       task=task,
                       device=device)
        model.compile(optimizer, loss, metrics)
        return model
Ejemplo n.º 2
0
def test_DeepFM(use_fm, hidden_size, sparse_feature_num):
    model_name = "DeepFM"
    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num,
                                          sparse_feature_num)

    model = DeepFM(feature_columns,
                   feature_columns,
                   use_fm=use_fm,
                   dnn_hidden_units=hidden_size,
                   dnn_dropout=0.5)
    check_model(model, model_name, x, y)
Ejemplo n.º 3
0
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

# 3.generate input data for model
train, test = train_test_split(glowpick, test_size=0.2)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}

device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

model = DeepFM(linear_feature_columns,
               dnn_feature_columns,
               task='regression',
               device=device)
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

# epoch 6
from math import sqrt
pred_ans = model.predict(test_model_input, batch_size=256)

print("test MSE", round(mean_squared_error(test[target].values, pred_ans), 4))
print("\ntest RMSE",
      round(sqrt(mean_squared_error(test[target].values, pred_ans)), 4))

use_col = [
    'created_at', 'rating', 'origin_user_id', 'origin_product_id',
    'origin_age', 'origin_gender', 'price', 'brandName',
                                      dnn_feature_columns)

    # 3.generate input data for model
    train, test = train_test_split(data, test_size=0.2)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}
    # 4.Define Model,train,predict and evaluate

    device = 'cpu'
    use_cuda = True
    if use_cuda and torch.cuda.is_available():
        print('cuda ready...')
        device = 'cuda:0'

    model = DeepFM(linear_feature_columns,
                   dnn_feature_columns,
                   task='regression',
                   device=device)
    model.compile(
        "adam",
        "mse",
        metrics=['mse'],
    )

    history = model.fit(
        train_model_input,
        train[target].values,
        batch_size=256,
        epochs=10,
        verbose=2,
        validation_split=0.2,
    )
Ejemplo n.º 5
0
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name: train[name] for name in feature_names}

test_model_input = {name: test[name] for name in feature_names}

device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

model = DeepFM(linear_feature_columns=linear_feature_columns,
               dnn_feature_columns=dnn_feature_columns,
               task='binary',
               l2_reg_embedding=1e-5,
               device=device)

model.compile(
    "adagrad",
    "binary_crossentropy",
    metrics=["binary_crossentropy", "auc"],
)
model.fit(train_model_input,
          train[target].values,
          batch_size=32,
          epochs=10,
          validation_split=0.0,
          verbose=2)
Ejemplo n.º 6
0
]
varlen_feature_columns = [
    VarLenSparseFeat('genres',
                     len(key2index) + 1, max_len, 'mean')
]  # Notice : value 0 is for padding for sequence input feature

linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

# 3.generate input data for model
model_input = {name: data[name] for name in feature_names}
model_input['genres'] = genres_list

# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')

model.compile(
    "adam",
    "mse",
    metrics=['mse'],
)
history = model.fit(
    model_input,
    data[target].values,
    batch_size=256,
    epochs=10,
    verbose=2,
    validation_split=0.2,
)
Ejemplo n.º 7
0
    feature_names = get_feature_names(linear_feature_columns +
                                      dnn_feature_columns)

    train, test = train_test_split(data, test_size=0.2, random_state=666)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    device = 'cpu'
    use_cuda = True
    if use_cuda and torch.cuda.is_available():
        print('cuda ready...')
        device = 'cuda:0'

    model = DeepFM(linear_feature_columns,
                   dnn_feature_columns,
                   task='binary',
                   device=device)
    model.compile(
        Adam(model.parameters(), lr),
        "binary_crossentropy",
        metrics=['binary_crossentropy', 'auc'],
    )

    history = model.fit(train_model_input,
                        train[target].values,
                        batch_size=64,
                        epochs=epoch,
                        verbose=2,
                        validation_split=0.2)
    pred_ans = model.predict(test_model_input, batch_size=64)
    print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))