def test_recommend_movies(pretrained, inputs, linear_feature_columns, dnn_feature_columns, DEVICE): # model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=DEVICE) model = FiBiNET(linear_feature_columns, dnn_feature_columns, task='regression', device=DEVICE) model.load_state_dict(torch.load(pretrained)) pred_ans = model.predict(inputs, batch_size=256) print(f'Predict rating: {pred_ans}') pred_movie_list = [] idx = np.argsort(pred_ans, axis=0)[::-1] for i, ans_idx in enumerate(idx[:, 0]): if i < 5: print( f"Predict rating: {pred_ans[ans_idx][0] :.3f}, movie_id: {data.iloc[ans_idx]['movie_id']}, gender: {data.iloc[ans_idx]['gender']}, age: {data.iloc[ans_idx]['age']}, user_id: {data.iloc[ans_idx]['user_id']}" ) pred_movie_list.append(data.iloc[ans_idx]['movie_id']) # print(inputs['movie_id'].iloc[i]) # print('movie_max',data.loc[:, ['movie_id']].max(axis=0)) # 1682 return pred_movie_list
def recommend_movies(pretrained, inputs, linear_feature_columns, dnn_feature_columns, DEVICE, df): data = df # print(data) # model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=DEVICE) model = FiBiNET(linear_feature_columns, dnn_feature_columns, task='regression', device=DEVICE) model.load_state_dict(torch.load(pretrained)) pred_ans = model.predict(inputs, batch_size=256) # print(f'Predict rating: {pred_ans}') pred_movie_list = [] pred_movie_genres = [] pred_rating = [] idx = np.argsort(pred_ans, axis=0)[::-1] movie_genres = [ col for col in data.columns if col not in ['movie_id', 'movie_title', 'unknown'] ] for i, ans_idx in enumerate(idx[:, 0]): # TODO: Add arguments (rank) if i < 2: genres = [ movie_genre for movie_genre in movie_genres if data.iloc[ans_idx][movie_genre] == 1 ] print( f"Predict rating: {pred_ans[ans_idx][0] :.3f}, movie_id: {data.iloc[ans_idx]['movie_id']}, movie_title: {data.iloc[ans_idx]['movie_title']}, gender: {data.iloc[ans_idx]['gender']}, age: {data.iloc[ans_idx]['age']}, genres: {genres}" ) pred_movie_list.append(data.iloc[ans_idx]['movie_title']) pred_movie_genres.append(genres) pred_rating.append(pred_ans[ans_idx][0]) # print(inputs['movie_id'].iloc[i]) # print('movie_max',data.loc[:, ['movie_id']].max(axis=0)) # 1682 return pred_movie_list, pred_movie_genres, pred_rating
def test_FiBiNET(bilinear_type): model_name = "FiBiNET" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, 3, 3) model = FiBiNET( feature_columns, feature_columns, bilinear_type=bilinear_type, dnn_hidden_units=[8, 8], dnn_dropout=0.5, ) check_model(model, model_name, x, y)
def train_recommend_movies(csv_file, DEVICE): """ Description: Train recommend system on: Model: "xDeepFM", Target: "rating", Input features: ["movie_id", "gender", "age"], Save model to: "save_model/xDeepFM_MSE{}.h5" Parameters: csv_file: "path to *.csv" DEVICE: "cuda:0" """ data = pd.read_csv(csv_file) # sparse_features = ["movie_id", "user_id", # "gender", "age", "occupation", "zip"] sparse_features = ["movie_id", "gender", "age"] movie_genres = [ 'Action', 'Adventure', 'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film_Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci_Fi', 'Thriller', 'War', 'Western' ] target = ['rating'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) sparse_features.extend(movie_genres) # 2.count #unique features for each sparse field fixlen_feature_columns = [ SparseFeat(feat, data[feat].nunique()) for feat in sparse_features ] # 他自己的資料型態 # SparseFeat(name='movie_id', vocabulary_size=187, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='movie_id', group_name='default_group') linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns feature_names = get_feature_names( linear_feature_columns + dnn_feature_columns ) # movie_id, user_id, gender, age, occupation, zip. # 3.generate input data for model train, test = train_test_split(data, test_size=0.2) train_model_input = {name: train[name] for name in feature_names} test_model_input = { name: test[name] for name in feature_names } # dict of movie_id, user_id, gender, age, occupation, zip values # 4.Define Model,train,predict and evaluate # model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device) model = FiBiNET(linear_feature_columns, dnn_feature_columns, task='regression', device=DEVICE) # model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device) model.compile( "adam", "mse", metrics=['mse'], ) history = model.fit( train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test MSE", round(mean_squared_error(test[target].values, pred_ans), 4)) print("test MAE", round(mean_absolute_error(test[target].values, pred_ans), 4)) # torch.save(model.state_dict(), './recommend_system/save_model/xDeepFM_MSE{}.h5' .format(round(mean_squared_error(test[target].values, pred_ans), 4))) torch.save( model.state_dict(), './recommend_system/save_model/FiBiNET_MSE{}.h5'.format( round(mean_squared_error(test[target].values, pred_ans), 4)))
dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate device = 'cpu' use_cuda = True if use_cuda and torch.cuda.is_available(): print('cuda ready...') device = 'cuda:0' model = FiBiNET(linear_feature_columns, dnn_feature_columns, task='binary', device=device) model.compile( "adam", "binary_crossentropy", metrics=["binary_crossentropy", "auc"], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2) pred_ans = model.predict(test_model_input, batch_size=256) #print(pred_ans) #print(" ")
dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate device = 'cpu' use_cuda = True if use_cuda and torch.cuda.is_available(): print('cuda ready...') device = 'cuda:0' model = FiBiNET(linear_feature_columns, dnn_feature_columns, task='regression', device=device) model.compile( "adam", "mse", metrics=['mse'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=87, verbose=2, validation_split=0.2) pred_ans = model.predict(test_model_input, batch_size=256) print("test MSE",