def get_movies_dict_from_file(): movies_dict = {} pth = get_dataset_dir() + 'ml-100k/ml-100k/u.item' with open(pth) as f: for line in f: ln_lst = line.split('|') movies_dict[ln_lst[0]] = (ln_lst[1], ln_lst[2]) return movies_dict
def read(): file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item' rid_name = {} with io.open(file_name, 'r', encoding='ISO-8859-1') as file: for line in file: line = line.split('|') rid_name[line[0]] = (line[1], line[2]) return rid_name
def read_item_names(): file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item' rid_to_name = {} with io.open(file_name, 'r', encoding='ISO-8859-1') as f: for line in f: line = line.split('|') rid_to_name[line[0]] = line[1] return rid_to_name
def id_to_user(): file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.user' rid_to_user = {} with io.open(file_name, 'r', encoding='ISO-8859-1') as f: for line in f: line = line.split('|') rid_to_user[line[0]] = {} rid_to_user[line[0]]['Identificador'] = line[0] rid_to_user[line[0]]['Idade'] = line[1] rid_to_user[line[0]]['Genero'] = line[2] return rid_to_user
def get_rid_to_item_mapping(): """Read the u.item file from MovieLens 100-k dataset and return two mappings to convert raw ids into movie names and movie names into raw ids. """ file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item' rid_to_name = {} with io.open(file_name, 'r', encoding='ISO-8859-1') as f: for line in f: line = line.split('|') rid_to_name[line[0]] = (line[1], line[2]) return rid_to_name
def read_user_names(): """Read the u.item file from MovieLens 100-k dataset and return two mappings to convert raw ids into movie names and movie names into raw ids. """ file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.data' user_id = {} name_to_rid = {} with io.open(file_name, 'r', encoding='ISO-8859-1') as f: for line in f: line = line.split('\t') user_id[line[0]] = line[0] return user_id
def read_item_names(): """Read the u.item file from MovieLens 100-k dataset and return two mappings to convert raw ids into movie names and movie names into raw ids. """ file_name = get_dataset_dir() + "/ml-100k/ml-100k/u.item" rid_to_name = {} name_to_rid = {} with io.open(file_name, "r", encoding="ISO-8859-1") as f: for line in f: line = line.split("|") rid_to_name[line[0]] = line[1] name_to_rid[line[1]] = line[0] return rid_to_name, name_to_rid
def read_item_names(): """Read the u.item file from MovieLens 100-k dataset and return two mappings to convert raw ids into movie names and movie names into raw ids. """ file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item' rid_to_name = {} name_to_rid = {} with io.open(file_name, 'r', encoding='ISO-8859-1') as f: for line in f: line = line.split('|') rid_to_name[line[0]] = line[1] name_to_rid[line[1]] = line[0] return rid_to_name, name_to_rid
# Loads move dataset data = Dataset.load_builtin("ml-100k") # Gets top 5 rated movies for each user ratings = pd.DataFrame({"userID": [rating[0] for rating in data.raw_ratings], "movieID": [rating[1] for rating in data.raw_ratings], "Rating": [rating[2] for rating in data.raw_ratings]}) \ .groupby(["userID"]) # Saves those that are 4 or 5 stars topMovies = [[ rating[1] for rating in np.array( ratings.get_group(str(uid)).sort_values( ["Rating"], ascending=False).head(5)) if rating[2] > 3.0 ] for uid in list(ratings.groups)] # Runs apriori algorithm on movies L, supportData = apriori(topMovies) rules = generateRules(L, supportData) # Builds dict to convert IDs to names file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item' id_to_name = {} with io.open(file_name, 'r', encoding='ISO-8859-1') as f: for line in f: line = line.split('|') id_to_name[int(line[0])] = line[1] pntRules(rules, id_to_name)
'user_based': True, 'min_support': films_count } algo = surprise.KNNBaseline(k=k, sim_options=sim_options) algo.fit(trainset) testset = trainset.build_anti_testset() testset = filter(lambda x: x[0] == id, testset) predictions = algo.test(testset) top_n = defaultdict(list) for uid, iid, _, est, _ in predictions: top_n[uid].append((iid, round(est, 3))) for uid, user_ratings in top_n.items(): user_ratings.sort(key=lambda x: x[1], reverse=True) top_n[uid] = user_ratings[:5] file_name = surprise.get_dataset_dir() + '/ml-100k/ml-100k/u.item' item = {} with open(file_name, 'r') as f: for line in f: line = line.split('|') item[line[0]] = (line[1], line[2]) print(f'User {id}:') for movie_id, rating in top_n[id]: print(str(movie_id) + "\t" + str(rating) + "\t" + str(item[movie_id]))
# Create list of tuples to append to test set for i in range(0, 5): temp_input = input("Rating for " + movies[i] + ": ") while int(temp_input) not in [1, 2, 3, 4, 5]: temp_input = input("Rating for " + movies[i] + ": ") my_tuple = (uid_for_new_user, m_id[i], float(temp_input), time) input_list.append(my_tuple) return input_list if __name__ == "__main__": # Get Data # file_path = os.path.expanduser('C:/cygwin64/home/jaipe/Machine Learning/ml-100k/u.data') file_path = os.path.expanduser(get_dataset_dir() + '/ml-100k/ml-100k/u.data') reader = Reader(line_format='user item rating timestamp', sep='\t') data = Dataset.load_from_file(file_path, reader=reader) # Get the mappings: raw id <-> movie name rid_to_name, name_to_rid = read_item_names() # Ask for user ratings new_uid = '1500' my_input_list = get_user_recs(uid_for_new_user=new_uid, dev=False) # Add to new user data to train on for tup in my_input_list: data.raw_ratings.append(tup)