Ejemplo n.º 1
0
def main(sample_num,
         embed_dim,
         learning_rate,
         epochs,
         batch_size,
         mode='max',
         attention_hidden_unit=None):
    """

    :param sample_num: the num of training sample
    :param embed_dim: the dimension of all embedding layer
    :param learning_rate:
    :param epochs:
    :param batch_size:
    :param mode
    :param attention_hidden_unit:
    :return:
    """
    feature_columns, train_X, test_X, train_y, test_y = create_dataset(
        sample_num, embed_dim)

    # ============================Build Model==========================
    model = AFM(feature_columns,
                mode,
                attention_hidden_unit=attention_hidden_unit)
    model.summary()
    # ============================model checkpoint======================
    # check_path = 'save/afm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # =========================Compile============================
    model.compile(loss=binary_crossentropy,
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ===========================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        # callbacks=[checkpoint],
        batch_size=batch_size,
        validation_split=0.1)
    # ===========================Test==============================
    print('test AUC: %f' % model.evaluate(test_X, test_y)[1])
Ejemplo n.º 2
0
def run_AFM():
    afm = AFM.AFM(field_size, feature_sizes, batch_size=32 * 8, is_shallow_dropout=False, verbose=True, use_cuda=True,
                          weight_decay=0.00002, use_fm=True, use_ffm=False, n_epochs= num_epoch)
    if online = False:
        afm.fit(Xi_train, Xv_train, y_train, Xi_test, Xv_test, y_test, ealry_stopping=True,refit=True)
Ejemplo n.º 3
0
    for feat in tqdm(sparse_features):
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])

    # 将连续值归一化
    for feat in tqdm(dense_features):
        mean = data[feat].mean()
        std = data[feat].std()
        data[feat] = (data[feat] - mean) / (std + 1e-12)
    # print(data.shape)
    # print(data.head())

    train, valid = train_test_split(data, test_size=0.1, random_state=42)
    # print(train.shape)   # (540000, 40)
    # print(valid.shape)   # (60000, 40)
    train_dataset = TensorDataset(torch.LongTensor(train[sparse_features].values),
                                  torch.FloatTensor(train[dense_features].values),
                                  torch.FloatTensor(train['label'].values))
    train_loader = DataLoader(dataset=train_dataset, batch_size=args.train_batch_size, shuffle=True)

    valid_dataset = TensorDataset(torch.LongTensor(valid[sparse_features].values),
                                  torch.FloatTensor(valid[dense_features].values),
                                  torch.FloatTensor(valid['label'].values))
    valid_loader = DataLoader(dataset=valid_dataset, batch_size=args.eval_batch_size, shuffle=False)

    cat_fea_unique = [data[f].nunique() for f in sparse_features]

    model = AFM(cat_fea_unique, num_fea_size=len(dense_features))

    train_model(model)
    learning_rate = 0.001
    batch_size = 4096
    epochs = 10

    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                                         embed_dim=embed_dim,
                                                         read_part=read_part,
                                                         sample_num=sample_num,
                                                         test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        model = AFM(feature_columns, mode, att_vector, activation, dropout,
                    embed_reg)
        model.summary()
        # =========================Compile============================
        model.compile(loss=binary_crossentropy,
                      optimizer=Adam(learning_rate=learning_rate),
                      metrics=[AUC()])
    # ============================model checkpoint======================
    # check_path = 'save/afm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # ===========================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        callbacks=[
Ejemplo n.º 5
0
'''

from model import AFM
from utils import create_criteo_dataset

import tensorflow as tf
from tensorflow.keras import optimizers, losses, metrics
from sklearn.metrics import accuracy_score

if __name__ == '__main__':
    file = 'E:\\PycharmProjects\\推荐算法\\data\\criteo_sample.txt'
    test_size = 0.2
    feature_columns, (X_train, y_train), (X_test, y_test) = \
                        create_criteo_dataset(file, test_size=test_size)

    model = AFM(feature_columns, 'att')
    optimizer = optimizers.SGD(0.01)

    # dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    # dataset = dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
    #
    # model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    # model.fit(dataset, epochs=100)
    # pre = model.predict(X_test)

    summary = tf.summary.create_file_writer('E:\\PycharmProjects\\tensorboard')
    for i in range(100):
        with tf.GradientTape() as tape:
            pre = model(X_train)
            loss = tf.reduce_mean(losses.binary_crossentropy(y_train, pre))
            print(loss.numpy())
Ejemplo n.º 6
0
        metric_train_auc = []
        metric_test_auc = []
        metric_train_log_loss = []
        metric_test_log_loss = []

        model_list = ['FM', 'DeepFM']

        xi_train, xv_train, y_train = get_deep_fm_data_format(
            train_data_list[i] + '.libsvm', config['field_info'])
        xi_test, xv_test, y_test = get_deep_fm_data_format(
            test_data_list[i] + '.libsvm', config['field_info'])

        # afm
        afm = AFM.AFM(config['field_size'],
                      config['feature_size'],
                      verbose=True,
                      use_cuda=False,
                      weight_decay=0.0001,
                      use_fm=True,
                      use_ffm=False)
        train_auc, train_loss, valid_auc, valid_loss = \
            afm.fit(xi_train, xv_train, y_train, xi_test, xv_test, y_test, early_stopping=True, refit=False)

        logging.info('validating')
        y_pred_afm = afm.predict_proba(xi_test, xv_test)

        # dump deep_fm result
        with open('afm_result', 'wb') as f:
            pickle.dump(y_pred_afm, f)
Ejemplo n.º 7
0
    learning_rate = 0.001
    batch_size = 512
    epochs = 5

    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                                         embed_dim=embed_dim,
                                                         read_part=read_part,
                                                         sample_num=sample_num,
                                                         test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    model = AFM(feature_columns,
                mode,
                attention_hidden_unit=attention_hidden_unit)
    model.summary()
    # ============================model checkpoint======================
    # check_path = 'save/afm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # =========================Compile============================
    model.compile(loss=binary_crossentropy,
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ===========================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,