Beispiel #1
0
    k = 10

    learning_rate = 0.001
    batch_size = 4096
    epochs = 10
    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                                         read_part=read_part,
                                                         sample_num=sample_num,
                                                         test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        model = FM(feature_columns=feature_columns, k=k)
        model.summary()
        # ============================Compile============================
        model.compile(loss=binary_crossentropy,
                      optimizer=Adam(learning_rate=learning_rate),
                      metrics=[AUC()])
    # ============================model checkpoint======================
    # check_path = '../save/fm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # ==============================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        callbacks=[
Beispiel #2
0
    k = 10

    learning_rate = 0.001
    batch_size = 512
    epochs = 5

    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                           read_part=read_part,
                                           sample_num=sample_num,
                                           test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    model = FM(feature_columns=feature_columns, k=k)
    # ============================model checkpoint======================
    # check_path = '../save/fm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # ============================Compile============================
    model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ==============================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        # callbacks=[checkpoint],
        batch_size=batch_size,
        validation_split=0.1
Beispiel #3
0
import argparse
parser = argparse.ArgumentParser(description='命令行参数')
parser.add_argument('-k', type=int, help='v_dim', default=8)
parser.add_argument('-w_reg', type=float, help='w正则', default=1e-4)
parser.add_argument('-v_reg', type=float, help='v正则', default=1e-4)
args=parser.parse_args()

if __name__ == '__main__':
    file_path = 'train.txt'
    (X_train, y_train), (X_test, y_test) = create_criteo_dataset(file_path, test_size=0.5)

    k = args.k
    w_reg = args.w_reg
    v_reg = args.v_reg

    model = FM(k, w_reg, v_reg)
    optimizer = optimizers.SGD(0.01)
    # train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    # train_dataset = train_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
    # model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])
    # model.fit(train_dataset, epochs=200)
    # print(model.evaluate(X_test, y_test))
    # model.summary()

    summary_writer = tf.summary.create_file_writer('E:\\PycharmProjects\\tensorboard')
    for i in range(100):
        with tf.GradientTape() as tape:
            y_pre = model(X_train)
            loss = tf.reduce_mean(losses.binary_crossentropy(y_true=y_train, y_pred=y_pre))
            print(loss.numpy())
        with summary_writer.as_default():
Beispiel #4
0
    train_dataset = train_dataset.shuffle(buffer_size=100)
    train_dataset = train_dataset.batch(batch_size)

    X_test = tf.data.Dataset.from_tensor_slices(X_test)
    y_test = tf.data.Dataset.from_tensor_slices(y_test)

    test_dataset = tf.data.Dataset.zip((X_test, y_test))
    test_dataset = test_dataset.batch(batch_size)

    # Directory where the checkpoints will be saved
    checkpoint_dir = './training_checkpoints'

    # Name of the checkpoint files
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{step}")

    model = FM(field_dims=field_dims, embedding_dim=embedding_dim)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss_fn = tf.losses.BinaryCrossentropy(from_logits=False)

    train_loss_results = []
    train_accuracy_results = []

    # train the model
    best_test_acc = 0
    for epoch in range(num_epochs):
        epoch_loss = tf.keras.metrics.Mean()
        epoch_accuracy = tf.keras.metrics.BinaryAccuracy()

        for nb, (X, y) in enumerate(train_dataset):
            y_hat, loss = train_step(X, y)
            # update metrics
Beispiel #5
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='FM')
    parser.add_argument('--test_size', type=int, default=0.2)
    parser.add_argument('--batch_size', type=int, default=256)
    parser.add_argument('--embed_dim', type=int, default=10)
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--lr', type=float, default=0.002)
    parser.add_argument('--file', type=str, default='./data/criteo_sampled_data.csv')
    args = parser.parse_args()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # =============================================Data=================================================================
    data, feat_columns, dense_feats, sparse_feats = create_dataset(file=args.file, embed_dim=args.embed_dim)
    train, valid = train_test_split(data, test_size=args.test_size)
    train_dataset = Data.TensorDataset(torch.LongTensor(train[sparse_feats].values),
                                       torch.FloatTensor(train[dense_feats].values),
                                       torch.FloatTensor(train['label'].values))
    train_loader = Data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
    valid_dataset = Data.TensorDataset(torch.LongTensor(valid[sparse_feats].values),
                                       torch.FloatTensor(valid[dense_feats].values),
                                       torch.FloatTensor(valid['label'].values))
    valid_loader = Data.DataLoader(dataset=valid_dataset, batch_size=args.batch_size, shuffle=False)
    # =============================================Model================================================================

    dense_feat_columns, sparse_feat_columns = feat_columns
    N = len(dense_feat_columns) + sum(feat['feat_num'] for feat in sparse_feat_columns)
    model = FM(N, args.embed_dim)
    model.to(device)
    # =============================================Train================================================================
    training(model, sparse_feat_columns, train_loader, valid_loader, args.batch_size, args.lr, args.epochs, device)
Beispiel #6
0
sample_num = 100000
test_size = 0.2

k = 32

learning_rate = 0.001
batch_size = 500
epochs = 100

feature_columns, train, test, val = create_criteo_dataset(
    file=file, read_part=read_part, sample_num=sample_num, test_size=test_size)
train_X, train_y = train
test_X, test_y = test
val_X, val_y = val

model = FM(feature_columns=feature_columns, k=k)
model.summary()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_auc',
                                                  verbose=1,
                                                  patience=10,
                                                  mode='max',
                                                  restore_best_weights=True)

model.compile(loss=tf.keras.losses.binary_crossentropy,
              optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
              metrics=[tf.keras.metrics.AUC()])

model.fit(
    train_X,
    train_y,
Beispiel #7
0
logging.info('df_train.shape ' + str(df_train.shape))
logging.info('train_labels.shape ' + str(train_labels.shape))

# 特征长度
feature_length = df_train.shape[1]
hp.feature_length = feature_length

# 样本数量
train_num = df_train.shape[0]

# 数据生成器
batch_gen = batch_generator([df_train.values, train_labels], hp.batch_size)

# initialize FM model
logging.info('initialize FM model')
fm_model = FM(hp)
fm_model.build_graph()

# begin session
logging.info('# Session')
saver = tf.train.Saver(max_to_keep=hp.max_to_keep)
with tf.Session() as sess:
    # 恢复数据
    ckpt = tf.train.latest_checkpoint(hp.logdir)
    if ckpt is None:
        logging.info('initialize fresh parameters for the fm model')
        sess.run(tf.global_variables_initializer())
    else:
        saver.restore(sess, ckpt)

    # merga all the summaries and write them out to train_logs
Beispiel #8
0
from model import FM, DNN
from utils import create_criteo_dataset

import tensorflow as tf
from tensorflow.keras import optimizers, losses, metrics
from sklearn.metrics import accuracy_score

if __name__ == '__main__':
    file_path = 'E:\\PycharmProjects\\推荐算法\\data\\criteo_sample.txt'
    (X_train, y_train), (X_test, y_test) = create_criteo_dataset(file_path,
                                                                 test_size=0.5)
    k = 8

    #**************** Statement 1 of Training *****************#
    model = FM(k)
    optimizer = optimizers.SGD(0.01)

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.batch(32).prefetch(
        tf.data.experimental.AUTOTUNE)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(train_dataset, epochs=200)

    # 评估
    fm_pre = model(X_test)
    fm_pre = [1 if x > 0.5 else 0 for x in fm_pre]

    #**************** Statement 2 of Training *****************#
Beispiel #9
0
        mean = data[feat].mean()
        std = data[feat].std()
        data[feat] = (data[feat] - mean) / (std + 1e-12)
    # print(data.shape)
    # print(data.head())

    train, valid = train_test_split(data, test_size=0.1, random_state=42)
    # print(train.shape)   # (540000, 40)
    # print(valid.shape)   # (60000, 40)
    train_dataset = TensorDataset(
        torch.LongTensor(train[sparse_features].values),
        torch.FloatTensor(train[dense_features].values),
        torch.FloatTensor(train['label'].values))
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.train_batch_size,
                              shuffle=True)

    valid_dataset = TensorDataset(
        torch.LongTensor(valid[sparse_features].values),
        torch.FloatTensor(valid[dense_features].values),
        torch.FloatTensor(valid['label'].values))
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=args.eval_batch_size,
                              shuffle=False)

    cat_fea_unique = [data[f].nunique() for f in sparse_features]

    model = FM(cat_fea_unique, num_fea_size=len(dense_features))

    train_model(model)