def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params): fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols = config.IGNORE_COLS) data_parser = DataParser(feat_dict= fd) # Xi_train :列的序号 # Xv_train :列的对应的值 Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True) Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest) print(dfTrain.dtypes) pnn_params['feature_size'] = fd.feat_dim pnn_params['field_size'] = len(Xi_train[0]) _get = lambda x,l:[x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) afm = AFM(**pnn_params) afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
def run_base_model_nfm(dfTrain, dfTest, folds, pnn_params): fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS) data_parser = DataParser(feat_dict=fd) # Xi_train :列的序号 # Xv_train :列的对应的值 Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True) Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest) print(dfTrain.dtypes) pnn_params['feature_size'] = fd.feat_dim pnn_params['field_size'] = len(Xi_train[0]) _get = lambda x, l: [x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get( Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get( Xv_train, valid_idx), _get(y_train, valid_idx) afm = AFM(**pnn_params) afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
def plot_afm(): # 读取数据 data, dense_features, sparse_features = read_criteo_data() dense_features = dense_features[:3] sparse_features = sparse_features[:2] # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat) linear_feature_columns = [ SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4) for i, feat in enumerate(sparse_features) ] + [DenseFeat( feat, 1, ) for feat in dense_features] dnn_feature_columns = [ SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4) for i, feat in enumerate(sparse_features) ] + [DenseFeat( feat, 1, ) for feat in dense_features] # 构建AFM模型 history = AFM(linear_feature_columns, dnn_feature_columns) keras.utils.plot_model(history, to_file="./imgs/AFM.png", show_shapes=True)
def train(epochs): train_ds, test_ds, num_feature = get_data() model = AFM(config.NUM_FIELD, num_feature, config.NUM_CONT, config.EMBEDDING_SIZE, config.HIDDEN_SIZE) optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) print("Start Training: Batch Size: {}, Embedding Size: {}, Hidden Size: {}".\ format(config.BATCH_SIZE, config.EMBEDDING_SIZE, config.HIDDEN_SIZE)) start = perf_counter() for i in range(epochs): acc = BinaryAccuracy(threshold=0.5) auc = AUC() loss_history = [] for x, y in train_ds: loss = train_on_batch(model, optimizer, acc, auc, x, y) loss_history.append(loss) print("Epoch {:03d}: 누적 Loss: {:.4f}, Acc: {:.4f}, AUC: {:.4f}".format( i, np.mean(loss_history), acc.result().numpy(), auc.result().numpy())) test_acc = BinaryAccuracy(threshold=0.5) test_auc = AUC() for x, y in test_ds: y_pred = model(x) test_acc.update_state(y, y_pred) test_auc.update_state(y, y_pred) print("테스트 ACC: {:.4f}, AUC: {:.4f}".format(test_acc.result().numpy(), test_auc.result().numpy())) print("Batch Size: {}, Embedding Size: {}, Hidden Size: {}".format( config.BATCH_SIZE, config.EMBEDDING_SIZE, config.HIDDEN_SIZE)) print("걸린 시간: {:.3f}".format(perf_counter() - start)) model.save_weights( 'weights/weights-epoch({})-batch({})-embedding({})-hidden({}).h5'. format(epochs, config.BATCH_SIZE, config.EMBEDDING_SIZE, config.HIDDEN_SIZE)) return model
val_ratio=0.2, double_process='min-max', save_h5_file=data_config['cache_file'], label='is_y2') enc.save(data_config['enc_file']) print(enc._field_dim, enc._feature_dim) params.update({'feature_size': enc._feature_dim}) params.update({'field_size': enc._field_dim}) if model_type.lower() == 'deepfm': model = DeepFM(params) elif model_type.lower() == 'xdeepfm': model = xDeepFM(params) elif model_type.lower() == 'afm': model = AFM(params) else: raise ValueError('{} not supported yet'.format(model_type)) with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # global_step counter etc. sys.stdout.flush() best_hit_rate = 0 best_epoch = 0 best_loss = np.finfo('float32').max stop_cnt = 0 if params['training_model']: #---------------training--------------------------------- for epoch in range(params['epoch']): print('epoch ={}'.format(epoch).center(50, '-'))
help='decay rate', type=float, default=0.99) args = parser.parse_args(args=[]) # load data set X_train_cate, X_train_cont, y_train, X_test_cate, X_test_cont, y_test, cate_list = load_dataset( args.input_dir) cate_num = X_train_cate.shape[1] cont_num = X_train_cont.shape[1] tf.reset_default_graph() with tf.Session() as sess: # define model model = AFM.AFM(args, cate_num, cont_num, cate_list) model.build() ckpt = tf.train.get_checkpoint_state( os.path.join(args.input_dir, args.model_name)) if ckpt: print('Loading model parameters from %s' % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print('Creating model with inital parameters') sess.run(tf.global_variables_initializer()) step = 0 for epoch in range(args.epoch): start_time = time.time()
from AFM import AFM import sys nrows = None if len(sys.argv) > 1: nrows = sys.argv[1] nrows = int(nrows) if __name__ == '__main__': path = '../data/data.csv' feature_size, data = data_loader.data_load('../data/data.csv', nrows=nrows) features = ['userId', 'movieId', 'tag'] num = data.shape[0] * 4 // 5 model = AFM(features, feature_size, embedding_size=8, verbose=False) X = data[features].values y = data.label.values.reshape(-1, 1) ''' model.fit( X[:num],y[:num], epoch=10, X_valid=X[num:],y_valid=y[num:], early_stopping=True, refit=True ) ''' import time start = time.time() model.fit(X[:num], y[:num], epoch=1) print('train a epoch cost %.2f' % (time.time() - start))