Ejemplo n.º 1
0
def main_sslim():
    URM = sps.csr_matrix(sps.load_npz("../../Dataset/URM/data_all.npz"))
    URM_test = sps.csr_matrix(sps.load_npz("../../Dataset/URM/data_test.npz"))

    URM_1 = sps.csr_matrix(sps.load_npz("../../Dataset/old/similarities/CB-Sim.npz"))
    URM_2 = sps.csr_matrix(sps.load_npz("../../Dataset/old/similarities/Col-Sim.npz"))
    URM_3 = sps.csr_matrix(sps.load_npz("../../Dataset/old/similarities/Slim-Sim.npz"))
    URM_4 = normalize(URM_3, min(min(URM_1.data), min(URM_2.data)), max(max(URM_1.data), max(URM_2.data)))
    mauri_recsys = ReccomenderSslim(URM)
    validator = validate(URM_test, [10])
    targetUsers = util.get_target_users("../../Dataset/target_users.csv", seek=8)
    #similarity_matrix = mauri.similarityMatrixTopK(0.31*URM_1 + 1.82*URM_2 + 0.76*URM_4, k=25)
    mauri_recsys.fit(train="-train")
Ejemplo n.º 2
0
def create_clusters():
    user_list = list(range(0, 30911))
    users_dict = dict()

    cold = utils.get_target_users("../../Dataset/target_users_cold.csv",
                                  seek=8)
    interaction_age, user_age, age = utils.create_tuples(
        "../../Dataset/UCM/UCM_age.csv", 13)
    interaction_region, user_region, region = utils.create_tuples(
        "../../Dataset/UCM/UCM_region.csv", 13)

    for u in user_list:
        if u in cold:
            user_list.remove(u)
            continue
        try:
            index = user_age.index(u)
            target_age = age[index]
        except ValueError:
            target_age = -1
        try:
            index = user_region.index(u)
            target_region = region[index]
        except ValueError:
            target_region = -1

        if target_age >= 0 and target_region >= 0:
            key = int(str(target_age) + str(target_region))
            if key not in users_dict:
                users_dict[key] = list()
            users_dict[key].append(u)
            user_list.remove(u)
        elif target_age < 0 and target_region >= 0:
            for i in range(0, max(age)):
                key = int(str(i) + str(target_region))
                if key not in users_dict:
                    users_dict[key] = list()
                users_dict[key].append(u)
            user_list.remove(u)
        elif target_age >= 0 and target_region < 0:
            for i in range(min(region), max(region)):
                key = int(str(target_age) + str(i))
                if key not in users_dict:
                    users_dict[key] = list()
                users_dict[key].append(u)
            user_list.remove(u)

    users_dict[0] = user_list
    users_dict[10000] = list()
    users_dict[10001] = cold

    print(users_dict)

    clusters = []

    for key in users_dict.copy().keys():
        if len(users_dict[key]) < 600:
            for u in users_dict.get(key):
                users_dict[10000].append(u)
            users_dict.pop(key)

    for key in users_dict:
        clusters.append(users_dict[key])

    return clusters
Ejemplo n.º 3
0
import scipy.sparse as sps
import utils_new as util
import numpy as np

# Uncomment to generate new dataset
#util.createDataset(".")

URM = sps.load_npz("data_all.npz")
URM = URM.tocsr()
cold_users = []
targetUsers = util.get_target_users("../target_users.csv", seek=8)
print(len(targetUsers))
print(max(targetUsers))
cold_user_mask = np.ediff1d(URM.indptr) == 0
user_mask = np.ediff1d(URM.indptr) < 3
#print(len(user_mask[user_mask == True]))
#exit()
for i in range(len(user_mask)):
    if user_mask[i] and i in targetUsers and not cold_user_mask[i]:
        cold_users.append(i)
        targetUsers.remove(i)

with open("../target_users_lukewarm.csv", 'w') as f:
    f.write("user_id\n")
    for user_id in cold_users:
        f.write(str(user_id) + "\n")
'''''
with open("target_users_other.csv", 'w') as f:
    f.write("user_id\n")
    for user_id in targetUsers:
        f.write(str(user_id) + "\n")