Ejemplo n.º 1
0
def test_PNN(use_inner, use_outter, sparse_feature_num):
    model_name = "PNN"
    sample_size = 64
    feature_dim_dict = {"sparse": {}, 'dense': []}
    for name, num in zip(["sparse", "dense"],
                         [sparse_feature_num, sparse_feature_num]):
        if name == "sparse":
            for i in range(num):
                feature_dim_dict[name][name + '_' +
                                       str(i)] = np.random.randint(1, 10)
        else:
            for i in range(num):
                feature_dim_dict[name].append(name + '_' + str(i))
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = PNN(feature_dim_dict,
                embedding_size=8,
                hidden_size=[32, 32],
                keep_prob=0.5,
                use_inner=use_inner,
                use_outter=use_outter)
    check_model(model, model_name, x, y)
Ejemplo n.º 2
0
def test_OPNN():
    name = "OPNN"

    sample_size = 64
    feature_dim_dict = {
        'sparse': {
            'sparse_1': 2,
            'sparse_2': 5,
            'sparse_3': 10
        },
        'dense': ['dense_1', 'dense_2', 'dense_3']
    }
    sparse_input = [
        np.random.randint(0, dim, sample_size)
        for dim in feature_dim_dict['sparse'].values()
    ]
    dense_input = [
        np.random.random(sample_size) for name in feature_dim_dict['dense']
    ]
    y = np.random.randint(0, 2, sample_size)
    x = sparse_input + dense_input

    model = PNN(
        feature_dim_dict,
        embedding_size=8,
        hidden_size=[32, 32],
        use_inner=False,
        use_outter=True,
        keep_prob=0.5,
    )
    model.compile('adam',
                  'binary_crossentropy',
                  metrics=['binary_crossentropy'])
    model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)
    print(name + " test train valid pass!")
    model.save_weights(name + '_weights.h5')
    model.load_weights(name + '_weights.h5')
    print(name + " test save load weight pass!")
    save_model(model, name + '.h5')
    model = load_model(name + '.h5', custom_objects)
    print(name + " test save load model pass!")

    print(name + " test pass!")
Ejemplo n.º 3
0
def test_PNN(use_inner, use_outter, sparse_feature_num):
    model_name = "PNN"
    sample_size = SAMPLE_SIZE
    x, y, feature_dim_dict = get_test_data(sample_size, sparse_feature_num,
                                           sparse_feature_num)
    model = PNN(feature_dim_dict,
                embedding_size=8,
                hidden_size=[32, 32],
                keep_prob=0.5,
                use_inner=use_inner,
                use_outter=use_outter)
    check_model(model, model_name, x, y)
Ejemplo n.º 4
0
def test_PNN(use_inner, use_outter, sparse_feature_num):
    model_name = "PNN"
    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num,
                                          sparse_feature_num)
    model = PNN(feature_columns,
                embedding_size=4,
                dnn_hidden_units=[4, 4],
                dnn_dropout=0.5,
                use_inner=use_inner,
                use_outter=use_outter)
    check_model(model, model_name, x, y)
Ejemplo n.º 5
0
def test_PNN_avazu(data, train, test):

    print("\nTesting PNN on avazu dataset...\n")

    results_activation_function = {"auc": [], "logloss": [], "rmse": []}
    results_dropout = {"auc": [], "logloss": [], "rmse": []}
    results_number_of_neurons = {"auc": [], "logloss": [], "rmse": []}

    auc = 0
    logloss = 0
    rmse = 0

    features_labels = train.columns

    sparse_features_labels = features_labels[1:23]
    target_label = features_labels[0]

    dnn_feature_columns = [
        SparseFeat(
            feat,
            vocabulary_size=data[feat].nunique(),
            embedding_dim=4,
        ) for feat in sparse_features_labels
    ]

    feature_names = get_feature_names(dnn_feature_columns)

    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    true_y = test[target_label].values

    print("\t\t-- ACTIVATION FUNCTIONS --\t\t")
    for dnn_activation in dnn_activation_list:
        print("\nTesting {dnn_activation}...".format(
            dnn_activation=dnn_activation))

        # model = PNN(dnn_feature_columns, use_inner=False, use_outter=True, dnn_activation = dnn_activation, task='binary')
        model = PNN(dnn_feature_columns,
                    use_inner=True,
                    use_outter=False,
                    dnn_activation=dnn_activation,
                    task='binary')
        model.compile(
            "adam",
            "binary_crossentropy",
            metrics=['binary_crossentropy'],
        )
        model.fit(
            train_model_input,
            train[target_label].values,
            batch_size=256,
            epochs=10,
            verbose=0,
            validation_split=TEST_PROPORTION,
        )
        pred_y = model.predict(test_model_input, batch_size=256)

        auc = compute_auc(true_y, pred_y)
        logloss = compute_log_loss(true_y, pred_y)
        rmse = compute_rmse(true_y, pred_y)

        results_activation_function["auc"].append(auc)
        results_activation_function["logloss"].append(logloss)
        results_activation_function["rmse"].append(rmse)

    print("\t\t-- DROPOUT RATES --\t\t")
    for dnn_dropout in dnn_dropout_list:
        print("\nTesting {dnn_dropout}...".format(dnn_dropout=dnn_dropout))

        # model = PNN(dnn_feature_columns, use_inner=False, use_outter=True, dnn_dropout = dnn_dropout, task='binary')
        model = PNN(dnn_feature_columns,
                    use_inner=True,
                    use_outter=False,
                    dnn_dropout=dnn_dropout,
                    task='binary')
        model.compile(
            "adam",
            "binary_crossentropy",
            metrics=['binary_crossentropy'],
        )
        model.fit(
            train_model_input,
            train[target_label].values,
            batch_size=256,
            epochs=10,
            verbose=0,
            validation_split=TEST_PROPORTION,
        )
        pred_y = model.predict(test_model_input, batch_size=256)

        auc = compute_auc(true_y, pred_y)
        logloss = compute_log_loss(true_y, pred_y)
        rmse = compute_rmse(true_y, pred_y)

        results_dropout["auc"].append(auc)
        results_dropout["logloss"].append(logloss)
        results_dropout["rmse"].append(rmse)

    print("\t\t-- HIDDEN UNITS --\t\t")
    for dnn_hidden_units in dnn_hidden_units_list:
        print("\nTesting {dnn_hidden_units}...".format(
            dnn_hidden_units=dnn_hidden_units))

        # model = PNN(dnn_feature_columns, use_inner=False, use_outter=True, dnn_hidden_units = dnn_hidden_units, task='binary')
        model = PNN(dnn_feature_columns,
                    use_inner=True,
                    use_outter=False,
                    dnn_hidden_units=dnn_hidden_units,
                    task='binary')
        model.compile(
            "adam",
            "binary_crossentropy",
            metrics=['binary_crossentropy'],
        )
        model.fit(train_model_input,
                  train[target_label].values,
                  batch_size=256,
                  epochs=10,
                  verbose=0,
                  validation_split=TEST_PROPORTION)
        pred_y = model.predict(test_model_input, batch_size=256)

        auc = compute_auc(true_y, pred_y)
        logloss = compute_log_loss(true_y, pred_y)
        rmse = compute_rmse(true_y, pred_y)

        results_number_of_neurons["auc"].append(auc)
        results_number_of_neurons["logloss"].append(logloss)
        results_number_of_neurons["rmse"].append(rmse)

    if PLOT:
        # create_plots("OPNN", "avazu", results_activation_function, "Activation Function", "activation_func", dnn_activation_list)
        # create_plots("OPNN", "avazu", results_dropout, "Dropout Rate", "dropout", dnn_dropout_list)
        # create_plots("OPNN", "avazu", results_number_of_neurons, "Number of Neurons per layer", "nr_neurons", dnn_hidden_units_list)
        create_plots("PNN", "avazu", results_activation_function,
                     "Activation Function", "activation_func",
                     dnn_activation_list)
        create_plots("PNN", "avazu", results_dropout, "Dropout Rate",
                     "dropout", dnn_dropout_list)
        create_plots("PNN", "avazu", results_number_of_neurons,
                     "Number of Neurons per layer", "nr_neurons",
                     dnn_hidden_units_list)
Ejemplo n.º 6
0
        VarLenSparseFeat(SparseFeat('neg_hist_category', train['category'].nunique() + 1,
                         embedding_dim = int(sys.argv[5]), embedding_name='category'), maxlen=max_len, 
                        length_name='seq_length')
    ]
behavior_feature_list = ['itemId', 'category']

if sys.argv[1] == 'DeepFM_UDG':
    model = DeepFM_UDG(linear_feature_columns, dnn_feature_columns, untrainable_features_columns, 
                       (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DeepFM':
    model = DeepFM(linear_feature_columns, dnn_feature_columns, [], (200, 80))
elif sys.argv[1] == 'PNN_UDG':
    model = PNN_UDG(dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, 
                    udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'PNN':
    model = PNN(dnn_feature_columns, untrainable_features_columns, (200, 80))
elif sys.argv[1] == 'WDL':
    model = WDL(linear_feature_columns, dnn_feature_columns, [], (200, 80))
elif sys.argv[1] == 'WDL_UDG':
    model = WDL_UDG(linear_feature_columns, dnn_feature_columns, untrainable_features_columns, (200, 80), uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DIEN':
    model = DIEN(fixlen_feature_columns, behavior_feature_list,
             dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True)
elif sys.argv[1] == 'DIEN_UDG':
    model = DIEN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, gru_type="AUGRU", use_negsampling=True, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
elif sys.argv[1] == 'DIN':
    model = DIN(fixlen_feature_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0)
elif sys.argv[1] == 'DIN_UDG':
    model = DIN_UDG(fixlen_feature_columns, untrainable_features_columns, behavior_feature_list, dnn_hidden_units=[200, 80], dnn_dropout=0, uid_feature_name=udg_features, udg_embedding_size=int(sys.argv[5]))
    
if sys.argv[4] == 'focal':
Ejemplo n.º 7
0
    linear_feature_columns = fixlen_feature_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

    # 3.generate input data for model

    # train, test = train_test_split(data, test_size=0.2)
    print("Spltting dataset into train and test sets...\n")
    train, test = train_test_split(data, test_size=0.2)
    # train, test = train_test_split(data_ohe, test_size=0.2)
    
    train_model_input = {name:train[name] for name in feature_names}
    test_model_input = {name:test[name] for name in feature_names}

    # 4.Define Model,train,predict and evaluate
    print("Defining PNN model...\n")
    model = PNN(dnn_feature_columns, task='binary')
    print("Compiling PNN model...\n")
    model.compile("adam", "binary_crossentropy",
                  metrics=['binary_crossentropy'], )
    print("Training the model...\n")
    model.fit(train_model_input, train[target].values,
              batch_size=256, epochs=10, verbose=1, validation_split=0.2, )
        
    print("\nTesting the model...\n")
    pred = model.predict(test_model_input, batch_size=256)
    
    print("test LogLoss", round(log_loss(test[target].values, pred), 4))
    print("test AUC", round(roc_auc_score(test[target].values, pred), 4))
    
    print("\nProgram ended in {time}".format(time = datetime.now() - start_time))