import os
import tensorflow.compat.v1 as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["KMP_WARNINGS"] = "FALSE"
tf.logging.set_verbosity(tf.logging.ERROR)

if __name__ == "__main__":
    col_names = ["user", "item", "label", "time", "sex",
                 "age", "occupation", "genre1", "genre2", "genre3"]
    all_data = pd.read_csv("sample_data/sample_movielens_merged.csv",
                       sep=",", header=0)

    # use first half data as first training part
    first_half_data = all_data[:(len(all_data) // 2)]
    train, test = split_by_ratio_chrono(first_half_data, test_size=0.2)

    sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"]
    dense_col = ["age"]
    user_col = ["sex", "age", "occupation"]
    item_col = ["genre1", "genre2", "genre3"]
    train_data, data_info = DatasetFeat.build_trainset(train, user_col, item_col,
                                                       sparse_col, dense_col,
                                                       shuffle=False)
    test_data = DatasetFeat.build_testset(test, shuffle=False)
    print(data_info)
    train_data.build_negative_samples(data_info, num_neg=1,
                                      item_gen_mode="random", seed=2020)
    test_data.build_negative_samples(data_info, num_neg=1,
                                     item_gen_mode="random", seed=2222)
Ejemplo n.º 2
0
# remove unnecessary tensorflow logging
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["KMP_WARNINGS"] = "FALSE"
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

if __name__ == "__main__":
    data = pd.read_csv("sample_data/sample_movielens_merged.csv",
                       sep=",",
                       header=0)
    # convert to implicit data and do negative sampling afterwards
    data["label"] = 1

    # split into train and test data based on time
    train_data, test_data = split_by_ratio_chrono(data, test_size=0.2)

    # specify complete columns information
    sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"]
    dense_col = ["age"]
    user_col = ["sex", "age", "occupation"]
    item_col = ["genre1", "genre2", "genre3"]

    train_data, data_info = DatasetFeat.build_trainset(train_data, user_col,
                                                       item_col, sparse_col,
                                                       dense_col)
    test_data = DatasetFeat.build_testset(test_data, sparse_col, dense_col)

    # sample negative items for each record
    train_data.build_negative_samples(data_info)
    test_data.build_negative_samples(data_info)
Ejemplo n.º 3
0
import os
import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["KMP_WARNINGS"] = "FALSE"
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

if __name__ == "__main__":
    col_names = [
        "user", "item", "label", "time", "sex", "age", "occupation", "genre1",
        "genre2", "genre3"
    ]
    data = pd.read_csv("sample_data/sample_movielens_merged.csv",
                       sep=",",
                       header=0)
    train, test = split_by_ratio_chrono(data, test_size=0.2)

    sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"]
    dense_col = ["age"]
    user_col = ["sex", "age", "occupation"]
    item_col = ["genre1", "genre2", "genre3"]
    train_data, data_info = DatasetFeat.build_trainset(train,
                                                       user_col,
                                                       item_col,
                                                       sparse_col,
                                                       dense_col,
                                                       shuffle=False)
    test_data = DatasetFeat.build_testset(test, shuffle=False)
    print(data_info)
    train_data.build_negative_samples(data_info,
                                      num_neg=1,