def main(dataPath, dataPath_val, batch_size):

    # must have list of training files
    files = glob.glob(dataPath + "/*.csv")[::5]

    # validation files
    files_val = glob.glob(dataPath_val + "/*.csv")[::5]

    # Count number of examples in training data
    nexs = get_total_examples(files)
    print("Number of training examples: ", nexs)

    nexs_val = get_total_examples(files_val)
    print("Number of validation examples: ", nexs_val)

    # Create data generator
    train_gen = DataGenerator(files, nexs, batch_size=batch_size)
    val_gen = DataGenerator(files_val, nexs_val, batch_size=batch_size)

    linear_feature_columns = train_gen.linear_feature_columns
    dnn_feature_columns = train_gen.dnn_feature_columns

    # 4.Define Model,train,predict and evaluate
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
    optimizer = keras.optimizers.Adam(lr=0.001,
                                      beta_1=0.9,
                                      beta_2=0.999,
                                      decay=0.0)
    model.compile(
        optimizer,
        "binary_crossentropy",
        metrics=['binary_crossentropy', auroc],
    )

    pbar = ProgbarLogger(count_mode='steps', stateful_metrics=None)

    weights_file = "model-5-lr0p001.h5"
    model_checkpoint = ModelCheckpoint(weights_file,
                                       monitor="val_binary_crossentropy",
                                       save_best_only=True,
                                       save_weights_only=True,
                                       verbose=1)

    history = model.fit_generator(train_gen,
                                  epochs=10,
                                  verbose=1,
                                  steps_per_epoch=nexs / batch_size,
                                  validation_data=val_gen,
                                  validation_steps=nexs / batch_size,
                                  callbacks=[model_checkpoint])
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min',
                                     save_weights_only=True)

        #history = model.fit_generator(generate_arrays_from_file('./data/sample/feature_mapped_valid.data', batch_size=batch_size),
        #            steps_per_epoch=int(np.ceil(num_train/batch_size)), callbacks=[checkpoint], epochs=50, verbose=1,
        #          validation_data=generate_arrays_from_file('./data/sample/feature_mapped_valid.data', batch_size=batch_size),
        #                             validation_steps=int(np.ceil(num_valid/batch_size)))
        history = model.fit_generator(
            generate_arrays_from_file(
                './data/feature_mapped_combined_train.data',
                batch_size=batch_size),
            steps_per_epoch=int(np.ceil(num_train / batch_size)),
            callbacks=[checkpoint],
            epochs=50,
            verbose=1,
            validation_data=generate_arrays_from_file(
                './data/feature_mapped_combined_valid.data',
                batch_size=batch_size),
            validation_steps=int(np.ceil(num_valid / batch_size)))

    elif mode == 'test':
        # model.load_weights('model_save/deep_fm_fn-ep002-loss0.148-val_loss0.174.h5')  # auc: 0.718467 batch_size=6000
        #model.load_weights('model_save/deep_fm_fn-ep001-loss0.149-val_loss0.175.h5')  # auc: 0.714243  batch_size = 2048
        # model.load_weights('model_save/deep_fm_fn-ep005-loss0.147-val_loss0.173.h5')  # auc: 0.722535  batch_size = 10000
        # model.load_weights('model_save/deep_fm_fn_bs10000-ep001-loss0.155-val_loss0.153.h5')  # auc: 0.738023
        #model.load_weights('model_save/deep_fm_fn_bs15000-ep001-loss0.156-val_loss0.152.h5')  # auc: 0.739935
        #model.load_weights('model_save/deep_fm_fn-ep002-loss0.154-val_loss0.154-bs15000-ee20-hz[128, 128].h5')  # auc: 0.741590
        model.load_weights(
            'model_save/deep_fm_fn-ep020-loss0.153-val_loss0.153-bs15000-ee20-hz[5, 600].h5'
Example #3
0
    return dataset


linear_feature_columns = varlen_feature_columns + fixed_feature_columns
dnn_feature_columns = varlen_feature_columns + fixed_feature_columns
callbacks = []
GPU = True
if GPU:
    strategy = tf.distribute.MirroredStrategy(devices=['/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3'])
    # strategy = tf.distribute.MirroredStrategy(devices=['/gpu:3'])
    with strategy.scope():
        model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[1024, 512, 256],
                       task='binary',
                       dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False)
        model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy', tf.keras.metrics.AUC()])
    # model.run_eagerly = True
    model.fit_generator(generator=get_dataset(), steps_per_epoch=None, epochs=10, verbose=2, callbacks=callbacks,
                        validation_data=get_dataset(eval_data_path), validation_steps=None, validation_freq=1,
                        class_weight=None,
                        max_queue_size=100, workers=10, use_multiprocessing=False, shuffle=True, initial_epoch=0)
    tf.saved_model.save(model, "./models")
else:
    model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[1024, 512, 256], task='binary',
                   dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False)
    model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy', tf.keras.metrics.AUC()])
    model.run_eagerly = True
    model.fit_generator(generator=get_dataset(), steps_per_epoch=None, epochs=10, verbose=2, callbacks=callbacks,
                        validation_data=get_dataset(eval_data_path), validation_steps=None, validation_freq=1,
                        class_weight=None,
                        max_queue_size=100, workers=10, use_multiprocessing=False, shuffle=True, initial_epoch=0)