Example #1
0
def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params):
    fd = FeatureDictionary(dfTrain=dfTrain,
                           dfTest=dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols = config.IGNORE_COLS)
    data_parser = DataParser(feat_dict= fd)
    # Xi_train :列的序号
    # Xv_train :列的对应的值
    Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
    Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)

    print(dfTrain.dtypes)

    pnn_params['feature_size'] = fd.feat_dim
    pnn_params['field_size'] = len(Xi_train[0])


    _get = lambda x,l:[x[i] for i in l]



    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)

        afm = AFM(**pnn_params)
        afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
Example #2
0
def run_base_model_nfm(dfTrain, dfTest, folds, pnn_params):
    fd = FeatureDictionary(dfTrain=dfTrain,
                           dfTest=dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS)
    data_parser = DataParser(feat_dict=fd)
    # Xi_train :列的序号
    # Xv_train :列的对应的值
    Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True)
    Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest)

    print(dfTrain.dtypes)

    pnn_params['feature_size'] = fd.feat_dim
    pnn_params['field_size'] = len(Xi_train[0])

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(
            Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(
            Xv_train, valid_idx), _get(y_train, valid_idx)

        afm = AFM(**pnn_params)
        afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
Example #3
0
def plot_afm():
    # 读取数据
    data, dense_features, sparse_features = read_criteo_data()
    dense_features = dense_features[:3]
    sparse_features = sparse_features[:2]

    # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    linear_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    dnn_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    # 构建AFM模型
    history = AFM(linear_feature_columns, dnn_feature_columns)
    keras.utils.plot_model(history, to_file="./imgs/AFM.png", show_shapes=True)
def train(epochs):
    train_ds, test_ds, num_feature = get_data()

    model = AFM(config.NUM_FIELD, num_feature, config.NUM_CONT,
                config.EMBEDDING_SIZE, config.HIDDEN_SIZE)

    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

    print("Start Training: Batch Size: {}, Embedding Size: {}, Hidden Size: {}".\
        format(config.BATCH_SIZE, config.EMBEDDING_SIZE, config.HIDDEN_SIZE))
    start = perf_counter()
    for i in range(epochs):
        acc = BinaryAccuracy(threshold=0.5)
        auc = AUC()
        loss_history = []

        for x, y in train_ds:
            loss = train_on_batch(model, optimizer, acc, auc, x, y)
            loss_history.append(loss)

        print("Epoch {:03d}: 누적 Loss: {:.4f}, Acc: {:.4f}, AUC: {:.4f}".format(
            i, np.mean(loss_history),
            acc.result().numpy(),
            auc.result().numpy()))

    test_acc = BinaryAccuracy(threshold=0.5)
    test_auc = AUC()
    for x, y in test_ds:
        y_pred = model(x)
        test_acc.update_state(y, y_pred)
        test_auc.update_state(y, y_pred)

    print("테스트 ACC: {:.4f}, AUC: {:.4f}".format(test_acc.result().numpy(),
                                                test_auc.result().numpy()))
    print("Batch Size: {}, Embedding Size: {}, Hidden Size: {}".format(
        config.BATCH_SIZE, config.EMBEDDING_SIZE, config.HIDDEN_SIZE))
    print("걸린 시간: {:.3f}".format(perf_counter() - start))
    model.save_weights(
        'weights/weights-epoch({})-batch({})-embedding({})-hidden({}).h5'.
        format(epochs, config.BATCH_SIZE, config.EMBEDDING_SIZE,
               config.HIDDEN_SIZE))

    return model
Example #5
0
                                  val_ratio=0.2,
                                  double_process='min-max',
                                  save_h5_file=data_config['cache_file'],
                                  label='is_y2')
        enc.save(data_config['enc_file'])

    print(enc._field_dim, enc._feature_dim)
    params.update({'feature_size': enc._feature_dim})
    params.update({'field_size': enc._field_dim})

    if model_type.lower() == 'deepfm':
        model = DeepFM(params)
    elif model_type.lower() == 'xdeepfm':
        model = xDeepFM(params)
    elif model_type.lower() == 'afm':
        model = AFM(params)
    else:
        raise ValueError('{} not supported yet'.format(model_type))

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())  # global_step counter etc.
        sys.stdout.flush()
        best_hit_rate = 0
        best_epoch = 0
        best_loss = np.finfo('float32').max
        stop_cnt = 0
        if params['training_model']:
            #---------------training---------------------------------
            for epoch in range(params['epoch']):
                print('epoch ={}'.format(epoch).center(50, '-'))
Example #6
0
                        help='decay rate',
                        type=float,
                        default=0.99)
    args = parser.parse_args(args=[])

    # load data set
    X_train_cate, X_train_cont, y_train, X_test_cate, X_test_cont, y_test, cate_list = load_dataset(
        args.input_dir)

    cate_num = X_train_cate.shape[1]
    cont_num = X_train_cont.shape[1]

    tf.reset_default_graph()
    with tf.Session() as sess:
        # define model
        model = AFM.AFM(args, cate_num, cont_num, cate_list)
        model.build()

        ckpt = tf.train.get_checkpoint_state(
            os.path.join(args.input_dir, args.model_name))
        if ckpt:
            print('Loading model parameters from %s' %
                  ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('Creating model with inital parameters')
            sess.run(tf.global_variables_initializer())

        step = 0
        for epoch in range(args.epoch):
            start_time = time.time()
Example #7
0
from AFM import AFM
import sys
nrows = None
if len(sys.argv) > 1:
    nrows = sys.argv[1]
    nrows = int(nrows)

if __name__ == '__main__':
    path = '../data/data.csv'

    feature_size, data = data_loader.data_load('../data/data.csv', nrows=nrows)
    features = ['userId', 'movieId', 'tag']

    num = data.shape[0] * 4 // 5

    model = AFM(features, feature_size, embedding_size=8, verbose=False)

    X = data[features].values
    y = data.label.values.reshape(-1, 1)
    '''
    model.fit(
        X[:num],y[:num], epoch=10,
        X_valid=X[num:],y_valid=y[num:],
        early_stopping=True, refit=True
    )
    '''
    import time

    start = time.time()
    model.fit(X[:num], y[:num], epoch=1)
    print('train a epoch cost %.2f' % (time.time() - start))