Exemple #1
0
def get_movies_dict_from_file():
    movies_dict = {}
    pth = get_dataset_dir() + 'ml-100k/ml-100k/u.item'
    with open(pth) as f:
        for line in f:
            ln_lst = line.split('|')
            movies_dict[ln_lst[0]] = (ln_lst[1], ln_lst[2])
    return movies_dict
Exemple #2
0
def read():
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    rid_name = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as file:
        for line in file:
            line = line.split('|')
            rid_name[line[0]] = (line[1], line[2])
    return rid_name
Exemple #3
0
def read_item_names():
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    rid_to_name = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = line[1]

    return rid_to_name
Exemple #4
0
def id_to_user():
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.user'
    rid_to_user = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_user[line[0]] = {}
            rid_to_user[line[0]]['Identificador'] = line[0]
            rid_to_user[line[0]]['Idade'] = line[1]
            rid_to_user[line[0]]['Genero'] = line[2]
    return rid_to_user
def get_rid_to_item_mapping():
    """Read the u.item file from MovieLens 100-k dataset and return two
    mappings to convert raw ids into movie names and movie names into raw ids.
    """
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    rid_to_name = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = (line[1], line[2])

    return rid_to_name
Exemple #6
0
def read_user_names():
    """Read the u.item file from MovieLens 100-k dataset and return two
    mappings to convert raw ids into movie names and movie names into raw ids.
    """

    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.data'
    user_id = {}
    name_to_rid = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('\t')
            user_id[line[0]] = line[0]

    return user_id
Exemple #7
0
def read_item_names():
	"""Read the u.item file from MovieLens 100-k dataset and return two
	mappings to convert raw ids into movie names and movie names into raw ids.
	"""

	file_name = get_dataset_dir() + "/ml-100k/ml-100k/u.item"
	rid_to_name = {}
	name_to_rid = {}
	with io.open(file_name, "r", encoding="ISO-8859-1") as f:
		for line in f:
			line = line.split("|")
			rid_to_name[line[0]] = line[1]
			name_to_rid[line[1]] = line[0]

	return rid_to_name, name_to_rid
def read_item_names():
    """Read the u.item file from MovieLens 100-k dataset and return two
    mappings to convert raw ids into movie names and movie names into raw ids.
    """

    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    rid_to_name = {}
    name_to_rid = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = line[1]
            name_to_rid[line[1]] = line[0]

    return rid_to_name, name_to_rid
# Loads move dataset
data = Dataset.load_builtin("ml-100k")

# Gets top 5 rated movies for each user
ratings = pd.DataFrame({"userID": [rating[0] for rating in data.raw_ratings],
                        "movieID": [rating[1] for rating in data.raw_ratings],
                        "Rating": [rating[2] for rating in data.raw_ratings]}) \
            .groupby(["userID"])

# Saves those that are 4 or 5 stars
topMovies = [[
    rating[1] for rating in np.array(
        ratings.get_group(str(uid)).sort_values(
            ["Rating"], ascending=False).head(5)) if rating[2] > 3.0
] for uid in list(ratings.groups)]

# Runs apriori algorithm on movies
L, supportData = apriori(topMovies)
rules = generateRules(L, supportData)

# Builds dict to convert IDs to names
file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
id_to_name = {}
with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
    for line in f:
        line = line.split('|')
        id_to_name[int(line[0])] = line[1]

pntRules(rules, id_to_name)
Exemple #10
0
    'user_based': True,
    'min_support': films_count
}
algo = surprise.KNNBaseline(k=k, sim_options=sim_options)
algo.fit(trainset)

testset = trainset.build_anti_testset()
testset = filter(lambda x: x[0] == id, testset)

predictions = algo.test(testset)

top_n = defaultdict(list)
for uid, iid, _, est, _ in predictions:
    top_n[uid].append((iid, round(est, 3)))

for uid, user_ratings in top_n.items():
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    top_n[uid] = user_ratings[:5]

file_name = surprise.get_dataset_dir() + '/ml-100k/ml-100k/u.item'
item = {}

with open(file_name, 'r') as f:
    for line in f:
        line = line.split('|')
        item[line[0]] = (line[1], line[2])

print(f'User {id}:')

for movie_id, rating in top_n[id]:
    print(str(movie_id) + "\t" + str(rating) + "\t" + str(item[movie_id]))
        # Create list of tuples to append to test set
        for i in range(0, 5):
            temp_input = input("Rating for " + movies[i] + ": ")
            while int(temp_input) not in [1, 2, 3, 4, 5]:
                temp_input = input("Rating for " + movies[i] + ": ")

            my_tuple = (uid_for_new_user, m_id[i], float(temp_input), time)
            input_list.append(my_tuple)

    return input_list


if __name__ == "__main__":
    # Get Data
    # file_path = os.path.expanduser('C:/cygwin64/home/jaipe/Machine Learning/ml-100k/u.data')
    file_path = os.path.expanduser(get_dataset_dir() +
                                   '/ml-100k/ml-100k/u.data')
    reader = Reader(line_format='user item rating timestamp', sep='\t')
    data = Dataset.load_from_file(file_path, reader=reader)

    # Get the mappings: raw id <-> movie name
    rid_to_name, name_to_rid = read_item_names()

    # Ask for user ratings
    new_uid = '1500'
    my_input_list = get_user_recs(uid_for_new_user=new_uid, dev=False)

    # Add to new user data to train on
    for tup in my_input_list:
        data.raw_ratings.append(tup)