import os import tensorflow.compat.v1 as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ["KMP_WARNINGS"] = "FALSE" tf.logging.set_verbosity(tf.logging.ERROR) if __name__ == "__main__": col_names = ["user", "item", "label", "time", "sex", "age", "occupation", "genre1", "genre2", "genre3"] all_data = pd.read_csv("sample_data/sample_movielens_merged.csv", sep=",", header=0) # use first half data as first training part first_half_data = all_data[:(len(all_data) // 2)] train, test = split_by_ratio_chrono(first_half_data, test_size=0.2) sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"] dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset(train, user_col, item_col, sparse_col, dense_col, shuffle=False) test_data = DatasetFeat.build_testset(test, shuffle=False) print(data_info) train_data.build_negative_samples(data_info, num_neg=1, item_gen_mode="random", seed=2020) test_data.build_negative_samples(data_info, num_neg=1, item_gen_mode="random", seed=2222)
# remove unnecessary tensorflow logging import os import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ["KMP_WARNINGS"] = "FALSE" tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) if __name__ == "__main__": data = pd.read_csv("sample_data/sample_movielens_merged.csv", sep=",", header=0) # convert to implicit data and do negative sampling afterwards data["label"] = 1 # split into train and test data based on time train_data, test_data = split_by_ratio_chrono(data, test_size=0.2) # specify complete columns information sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"] dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset(train_data, user_col, item_col, sparse_col, dense_col) test_data = DatasetFeat.build_testset(test_data, sparse_col, dense_col) # sample negative items for each record train_data.build_negative_samples(data_info) test_data.build_negative_samples(data_info)
import os import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ["KMP_WARNINGS"] = "FALSE" tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) if __name__ == "__main__": col_names = [ "user", "item", "label", "time", "sex", "age", "occupation", "genre1", "genre2", "genre3" ] data = pd.read_csv("sample_data/sample_movielens_merged.csv", sep=",", header=0) train, test = split_by_ratio_chrono(data, test_size=0.2) sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"] dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset(train, user_col, item_col, sparse_col, dense_col, shuffle=False) test_data = DatasetFeat.build_testset(test, shuffle=False) print(data_info) train_data.build_negative_samples(data_info, num_neg=1,