Exemple #1
0
# file_path = "./data/ciaodvd/movie-ratings.txt"
# trust_path = "./data/ciaodvd/trusts.txt"
# dataset = "ciaodvd"

file_path = "./data/epinions/ratings_data.txt"
trust_path = "./data/epinions/trust_data.txt"
dataset = "epinions"

# file_path = "./data/filmtrust/ratings.txt"
# trust_path = "./data/filmtrust/trust.txt"
# dataset = "filmtrust"


# Preprocess data
data_class = Preprocessor(file_path, dataset)

print("gonna percentage clean")
data_class.print_stats()
# data_class.percentage_cleaning(percent=0.10, alpha_i=3, alpha_u=2)
data_class.threshold_cleaning(min_items=20, min_users=20)
data_class.print_stats()

c = 1
n = 5

entry_list = data_class.return_ratings_list()

# Initialize recommender system
recsys = ILRecSys([], black_box="knn", params={"corr": "pearson", "strat": "top_k", "param": 30}, 
                  n=n, c=c, loss_type="squared")
import os
from copy import deepcopy

# will use this config file to set settings
YAML_FILE = "./config/shills.yml"
with open(YAML_FILE, 'r') as file:
    y_d = yaml.load(file)

dataset = y_d["dataset"]
data_file = y_d["paths"][dataset]["ratings"]

print("----Dataset Details----")
print(f"Dataset: {dataset}")

# Preprocess data
data_class = Preprocessor(data_file, dataset)

if y_d["clean"]["type_of"] == "percent":
    # Cleans using percentage cleaning (matrix has to be x% filled)
    alpha_i = y_d["clean"]["alpha_i"]
    alpha_u = y_d["clean"]["alpha_u"]
    data_class.percentage_cleaning(percent=y_d["clean"]["percent"],
                                   alpha_i=alpha_i,
                                   alpha_u=alpha_u)
    print("Cleaning Strategy: Percentage")
    print(f'Cleaning Percentage: {y_d["clean"]["percent"]}')
    print(f"Cleaning Params: {alpha_i} {alpha_u}")
elif y_d["clean"]["type_of"] == "threshold":
    # Cleans until all items meet the min number of users and viceversa
    min_items = y_d["clean"]["min_items"]
    min_users = y_d["clean"]["min_users"]