예제 #1
0
def pcc_item_rating_pred(path, rating, method, k):
    start = time.time()
    name = 'pcc_item'
    data = extract_data(path)
    mtx = get_matrix(3).toarray()
    item_mtx = []
    result = []
    zero = np.where(~mtx.any(axis=0))[0]  # get zero
    mtx[:, [zero]] = 0.00001  # prevent zero-devide
    #normalize
    pcc = (mtx.T - np.sum(mtx, axis=1)) / len(mtx)
    pcc /= np.linalg.norm(mtx, axis=1).T
    mtx = pcc.T
    if method == 'dot':
        item_mtx = dot_sim(mtx, name)
    elif method == 'cos':
        inputs = (mtx.T * np.linalg.norm(mtx, axis=1)).T
        item_mtx = cos_sim(inputs, name)
    #KNN
    for i in data:
        score = 0
        item_id = i[0]  #get item_id
        user_id = i[1]  #get user_id
        item = item_mtx[item_id]  #row
        knn = np.argsort(item, kind='heapsort')[::-1][0:k + 1]
        if item_id in knn:  # delte query
            idx = np.where(knn == item_id)
            knn = np.delete(knn, idx)
        else:
            knn = np.delete(knn, len(knn) - 1)
        #get score
        if rating == 'mean':
            score = np.sum(np.take(mtx[:, user_id],
                                   knn.tolist())) / float(k) + 3
        elif rating == 'weighted':
            knn_sim = item[knn]
            if np.sum(knn_sim) != 0:  #prevent zero-devide
                weight = knn_sim / np.sum(knn_sim)
                score = np.sum(
                    np.multiply(np.take(mtx[:, user_id], knn.tolist()),
                                weight)) + 3
            else:
                score = np.sum(mtx[:, user_id]) / np.size(
                    np.nonzero(mtx[:, user_id])) + 3
        result.append(score)
    write(result, name, rating, method, k)
    print('item_rating_pred {} {} {} time : {}'.format(method, rating, k,
                                                       time.time() - start))
    gold = golden()
    print("RMSE :", np.sqrt(np.mean(np.square(result - gold))))
def user_rating_pred(path, rating,method,k):
    start = time.time()
    name='user'
    data = extract_data(path)
    mtx = get_matrix(3).toarray()
    user_mtx = []
    result = []
    zero = np.where(~mtx.any(axis=0))[0] #get zero
    mtx[:, [zero]] = 0.00001 # prevent zero-devide
    if method =='dot':
        user_mtx = dot_sim(mtx,name)
    elif method=='cos':
        inputs=np.linalg.norm(mtx,axis=0)*mtx #normalize before cos_sim
        user_mtx = cos_sim(inputs,name)#honestly cos_sim is cosine similariy but input is normalized so same with cos_similarity
    for i in data:
        score = 0
        mv_id = i[0] #get item_id
        user_id = i[1] #get user_id
        user = user_mtx[user_id] #get user
        knn = np.argsort(user,kind='heapsort')[::-1][0: k+1]
        if user_id in knn:# delte query
            i = np.where(knn == user_id)
            knn = np.delete(knn, i)
        else:
            knn = np.delete(knn, len(knn) - 1)
        #get score
        if rating == 'mean':
            score = (np.sum(np.take(mtx[mv_id, :], knn.tolist())) / float(k))+3
        elif rating=='weighted':
            knn_sim = user[knn]
            if np.sum(knn_sim) != 0:
                weight = knn_sim / np.sum(knn_sim) #prevent zero-devide
                score = np.sum(np.multiply(np.take(mtx[mv_id, :], knn.tolist()), weight))+3
            else:
                score = np.sum(mtx[mv_id, :]) / np.size(np.nonzero(mtx[mv_id, :]))+3
        result.append(score)
    #print('start _writting')
    write(result,name,rating,method,k)
    print('user_rating_pred {} {} {} time : {}'.format(method,rating, k,time.time() - start))
    gold=golden()
    print("RMSE :",np.sqrt(np.mean(np.square(result-gold))))
예제 #3
0
#!/usr/bin/env python
# coding: utf-8

# In[2]:

from myutils import get_matrix, extract_data
import numpy as np
import torch
from copy import deepcopy
import time

# In[3]:

data = extract_data('data/dev.csv')

# In[4]:

import csv

f = open('data/dev.golden', 'r', encoding='utf-8')
reader = csv.reader(f)
golden = []
for i in reader:
    golden += i
golden = np.array(golden, dtype=float)

# In[5]:


def get_score(U, V, data):
    u = U.numpy().take(data.take(1, axis=1), axis=0)