Exemple #1
0
def build_model(features_dict, vocabularies):
    model = DCN(
        features_dict=features_dict,
        vocabularies=vocabularies,
        num_cross_layers=FLAGS.num_cross_layers,
        num_deep_layers=FLAGS.num_deep_layers,
        deep_layer_size=FLAGS.deep_layer_size,
        model_structure=FLAGS.model_structure,
        embedding_dim=FLAGS.embedding_dim,
        projection_dim=FLAGS.projection_dim,
        l2_penalty=FLAGS.l2_penalty,
    )
    return model
Exemple #2
0
def main(learning_rate, epochs, hidden_units):
    """
    feature_columns is a list and contains two dict:
    - dense_features: {feat: dense_feature_name}
    - sparse_features: {feat: sparse_feature_name, feat_num: the number of this feature,
    embed_dim: the embedding dimension of this feature }
    train_X: [dense_train_X, sparse_train_X]
    test_X: [dense_test_X, sparse_test_X]
    """
    feature_columns, train_X, test_X, train_y, test_y = create_dataset()

    # ============================Build Model==========================
    model = DCN(feature_columns, hidden_units)
    model.summary()
    # =============================Tensorboard=========================
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    log_dir = 'logs/' + current_time
    tensorboard = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir,
        histogram_freq=1,
        write_graph=True,
        write_grads=False,
        write_images=True,
        embeddings_freq=0, embeddings_layer_names=None,
        embeddings_metadata=None, embeddings_data=None, update_freq=500
    )
    # ============================model checkpoint======================
    check_path = 'save/dcn_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
                                                    verbose=1, period=4)
    # =========================Compile============================
    model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ===========================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        callbacks=[tensorboard, checkpoint],
        batch_size=128,
        validation_split=0.2
    )
    # ===========================Test==============================
    print('test AUC: %f' % model.evaluate(test_X, test_y)[1])
Exemple #3
0
    dnn_dropout = 0.5
    hidden_units = [256, 128, 64]

    learning_rate = 0.001
    batch_size = 4096
    epochs = 10
    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                                         embed_dim=embed_dim,
                                                         read_part=read_part,
                                                         sample_num=sample_num,
                                                         test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    model = DCN(feature_columns, hidden_units, dnn_dropout=dnn_dropout)
    model.summary()
    # ============================model checkpoint======================
    # check_path = 'save/dcn_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # =========================Compile============================
    model.compile(loss=binary_crossentropy,
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ===========================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        callbacks=[
Exemple #4
0
def run_DCN():
    dcn = DCN.DCN(field_size, feature_sizes, batch_size=32 * 8, verbose=True, use_cuda=True,
                          weight_decay=0.00002, use_inner_product=True, n_epochs=num_epoch)
    if online = False:
        dcn.fit(Xi_train, Xv_train, y_train, Xi_test, Xv_test, y_test, ealry_stopping=True,refit=True)
import tensorflow as tf
from tensorflow.keras import losses, optimizers
from sklearn.metrics import accuracy_score

if __name__ == '__main__':
    file = 'E:\\PycharmProjects\\推荐算法\\data\\train.txt'
    test_size = 0.4
    hidden_units = [256, 128, 64]

    feature_columns, (X_train, y_train), (X_test,
                                          y_test) = create_criteo_dataset(
                                              file, test_size=test_size)

    model = DCN(feature_columns,
                hidden_units,
                1,
                activation='relu',
                layer_num=6)
    optimizer = optimizers.SGD(0.01)

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.batch(32).prefetch(
        tf.data.experimental.AUTOTUNE)

    # model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
    # model.fit(train_dataset, epochs=100)
    # logloss, auc = model.evaluate(X_test, y_test)
    # print('logloss {}\nAUC {}'.format(round(logloss,2), round(auc,2)))
    # model.summary()

    summary_writer = tf.summary.create_file_writer(
Exemple #6
0
    dnn_dropout = 0.5
    hidden_units = [256, 128, 64]

    learning_rate = 0.001
    batch_size = 512
    epochs = 5
    # ========================== Create dataset =======================
    feature_columns, train, test = create_criteo_dataset(file=file,
                                                         embed_dim=embed_dim,
                                                         read_part=read_part,
                                                         sample_num=sample_num,
                                                         test_size=test_size)
    train_X, train_y = train
    test_X, test_y = test
    # ============================Build Model==========================
    model = DCN(feature_columns, hidden_units, dnn_dropout)
    model.summary()
    # ============================model checkpoint======================
    # check_path = 'save/dcn_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt'
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True,
    #                                                 verbose=1, period=5)
    # =========================Compile============================
    model.compile(loss=binary_crossentropy,
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=[AUC()])
    # ===========================Fit==============================
    model.fit(
        train_X,
        train_y,
        epochs=epochs,
        # callbacks=[tensorboard, checkpoint],
Exemple #7
0
        mean = data[feat].mean()
        std = data[feat].std()
        data[feat] = (data[feat] - mean) / (std + 1e-12)
    # print(data.shape)
    # print(data.head())

    train, valid = train_test_split(data, test_size=0.1, random_state=42)
    # print(train.shape)   # (540000, 40)
    # print(valid.shape)   # (60000, 40)
    train_dataset = TensorDataset(
        torch.LongTensor(train[sparse_features].values),
        torch.FloatTensor(train[dense_features].values),
        torch.FloatTensor(train['label'].values))
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.train_batch_size,
                              shuffle=True)

    valid_dataset = TensorDataset(
        torch.LongTensor(valid[sparse_features].values),
        torch.FloatTensor(valid[dense_features].values),
        torch.FloatTensor(valid['label'].values))
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=args.eval_batch_size,
                              shuffle=False)

    cat_fea_unique = [data[f].nunique() for f in sparse_features]

    model = DCN(cat_fea_unique, num_fea_size=len(dense_features))

    train_model(model)