def main(sample_num, embed_dim, learning_rate, epochs, batch_size, mode='max', attention_hidden_unit=None): """ :param sample_num: the num of training sample :param embed_dim: the dimension of all embedding layer :param learning_rate: :param epochs: :param batch_size: :param mode :param attention_hidden_unit: :return: """ feature_columns, train_X, test_X, train_y, test_y = create_dataset( sample_num, embed_dim) # ============================Build Model========================== model = AFM(feature_columns, mode, attention_hidden_unit=attention_hidden_unit) model.summary() # ============================model checkpoint====================== # check_path = 'save/afm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # =========================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ===========================Fit============================== model.fit( train_X, train_y, epochs=epochs, # callbacks=[checkpoint], batch_size=batch_size, validation_split=0.1) # ===========================Test============================== print('test AUC: %f' % model.evaluate(test_X, test_y)[1])
def run_AFM(): afm = AFM.AFM(field_size, feature_sizes, batch_size=32 * 8, is_shallow_dropout=False, verbose=True, use_cuda=True, weight_decay=0.00002, use_fm=True, use_ffm=False, n_epochs= num_epoch) if online = False: afm.fit(Xi_train, Xv_train, y_train, Xi_test, Xv_test, y_test, ealry_stopping=True,refit=True)
for feat in tqdm(sparse_features): lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # 将连续值归一化 for feat in tqdm(dense_features): mean = data[feat].mean() std = data[feat].std() data[feat] = (data[feat] - mean) / (std + 1e-12) # print(data.shape) # print(data.head()) train, valid = train_test_split(data, test_size=0.1, random_state=42) # print(train.shape) # (540000, 40) # print(valid.shape) # (60000, 40) train_dataset = TensorDataset(torch.LongTensor(train[sparse_features].values), torch.FloatTensor(train[dense_features].values), torch.FloatTensor(train['label'].values)) train_loader = DataLoader(dataset=train_dataset, batch_size=args.train_batch_size, shuffle=True) valid_dataset = TensorDataset(torch.LongTensor(valid[sparse_features].values), torch.FloatTensor(valid[dense_features].values), torch.FloatTensor(valid['label'].values)) valid_loader = DataLoader(dataset=valid_dataset, batch_size=args.eval_batch_size, shuffle=False) cat_fea_unique = [data[f].nunique() for f in sparse_features] model = AFM(cat_fea_unique, num_fea_size=len(dense_features)) train_model(model)
learning_rate = 0.001 batch_size = 4096 epochs = 10 # ========================== Create dataset ======================= feature_columns, train, test = create_criteo_dataset(file=file, embed_dim=embed_dim, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test # ============================Build Model========================== mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = AFM(feature_columns, mode, att_vector, activation, dropout, embed_reg) model.summary() # =========================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ============================model checkpoint====================== # check_path = 'save/afm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # ===========================Fit============================== model.fit( train_X, train_y, epochs=epochs, callbacks=[
''' from model import AFM from utils import create_criteo_dataset import tensorflow as tf from tensorflow.keras import optimizers, losses, metrics from sklearn.metrics import accuracy_score if __name__ == '__main__': file = 'E:\\PycharmProjects\\推荐算法\\data\\criteo_sample.txt' test_size = 0.2 feature_columns, (X_train, y_train), (X_test, y_test) = \ create_criteo_dataset(file, test_size=test_size) model = AFM(feature_columns, 'att') optimizer = optimizers.SGD(0.01) # dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) # dataset = dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE) # # model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) # model.fit(dataset, epochs=100) # pre = model.predict(X_test) summary = tf.summary.create_file_writer('E:\\PycharmProjects\\tensorboard') for i in range(100): with tf.GradientTape() as tape: pre = model(X_train) loss = tf.reduce_mean(losses.binary_crossentropy(y_train, pre)) print(loss.numpy())
metric_train_auc = [] metric_test_auc = [] metric_train_log_loss = [] metric_test_log_loss = [] model_list = ['FM', 'DeepFM'] xi_train, xv_train, y_train = get_deep_fm_data_format( train_data_list[i] + '.libsvm', config['field_info']) xi_test, xv_test, y_test = get_deep_fm_data_format( test_data_list[i] + '.libsvm', config['field_info']) # afm afm = AFM.AFM(config['field_size'], config['feature_size'], verbose=True, use_cuda=False, weight_decay=0.0001, use_fm=True, use_ffm=False) train_auc, train_loss, valid_auc, valid_loss = \ afm.fit(xi_train, xv_train, y_train, xi_test, xv_test, y_test, early_stopping=True, refit=False) logging.info('validating') y_pred_afm = afm.predict_proba(xi_test, xv_test) # dump deep_fm result with open('afm_result', 'wb') as f: pickle.dump(y_pred_afm, f)
learning_rate = 0.001 batch_size = 512 epochs = 5 # ========================== Create dataset ======================= feature_columns, train, test = create_criteo_dataset(file=file, embed_dim=embed_dim, read_part=read_part, sample_num=sample_num, test_size=test_size) train_X, train_y = train test_X, test_y = test # ============================Build Model========================== model = AFM(feature_columns, mode, attention_hidden_unit=attention_hidden_unit) model.summary() # ============================model checkpoint====================== # check_path = 'save/afm_weights.epoch_{epoch:04d}.val_loss_{val_loss:.4f}.ckpt' # checkpoint = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, # verbose=1, period=5) # =========================Compile============================ model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate), metrics=[AUC()]) # ===========================Fit============================== model.fit( train_X, train_y, epochs=epochs,