def predict(otrain): binary = (otrain > 0) norm = NormalizePositive(axis=1) train = norm.fit_transform(otrain) dists = distance.pdist(binary, 'correlation') dists = distance.squareform(dists) neighbors = dists.argsort(axis=1) filled = train.copy() for u in range(filled.shape[0]): # n_u are the neighbors of user n_u = neighbors[u, 1:] for m in range(filled.shape[1]): # This code could be faster using numpy indexing trickery as the # cost of readibility (this is left as an exercise to the reader): revs = [train[neigh, m] for neigh in n_u if binary[neigh, m]] if len(revs): n = len(revs) n //= 2 n += 1 revs = revs[:n] filled[u, m] = np.mean(revs) return norm.inverse_transform(filled)
def predict(otrain): binary = (otrain > 0) norm = NormalizePositive(axis=1) train = norm.fit_transform(otrain) dists = distance.pdist(binary, 'correlation') dists = distance.squareform(dists) neighbors = dists.argsort(axis=1) filled = train.copy() for u in range(filled.shape[0]): # n_u are the neighbors of user n_u = neighbors[u, 1:] for m in range(filled.shape[1]): # This code could be faster using numpy indexing trickery as the # cost of readibility (this is left as an exercise to the reader): revs = [train[neigh, m] for neigh in n_u if binary[neigh, m]] if len(revs): n = len(revs) n //= 2 n += 1 revs = revs[:n] filled[u,m] = np.mean(revs) return norm.inverse_transform(filled)
def predict(otrain): binary = (otrain > 0) norm = NormalizePositive(axis=1) train = norm.fit_transform(otrain) dists = distance.pdist(binary, 'correlation') dists = distance.squareform(dists) neighbors = dists.argsort(axis=1) filled = train.copy() for u in range(filled.shape[0]): # n_u는 사용자 이웃이다 n_u = neighbors[u, 1:] for m in range(filled.shape[1]): # 이 코드는 numpy 인덱싱을 사용하여 빠르게 할 수 있다 # 이해하기는 좀 더 힘들다 revs = [train[neigh, m] for neigh in n_u if binary[neigh, m]] if len(revs): n = len(revs) n //= 2 n += 1 revs = revs[:n] filled[u,m] = np.mean(revs) return norm.inverse_transform(filled)
def predict(otrain): #拿到打过分的index binary = (otrain > 0) norm = NormalizePositive(axis=1) #归一化 train = norm.fit_transform(otrain) #计算用户打分之间的相似度,这里是只关心打分与否,而不关心多少分? dists = distance.pdist(binary, 'correlation') dists = distance.squareform(dists) #对dists进行排序,axis=1代表对行排序,每一行代表从近到远对应列下标,如 2 0 1代表第二列是距离最近,第0列其次.. neighbors = dists.argsort(axis=1) filled = train.copy() for u in range(filled.shape[0]): # n_u 是第u个用户的邻居 n_u = neighbors[u, 1:] #m是电影 for m in range(filled.shape[1]): # This code could be faster using numpy indexing trickery as the # cost of readibility (this is left as an exercise to the reader): #对于用户u的邻居n_u,如果他对电影m打分了,则进入到revs列表中 revs = [train[neigh, m] for neigh in n_u if binary[neigh, m]] #将revs列表中的前一半的均值作为 用户u对电影m的预测 if len(revs): n = len(revs) n //= 2 n += 1 revs = revs[:n] filled[u, m] = np.mean(revs) return norm.inverse_transform(filled)
def predict(train): binary = (train > 0) reg = ElasticNetCV(fit_intercept=True, alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.]) norm = NormalizePositive() train = norm.fit_transform(train) filled = train.copy() for u in range(filled.shape[0]): curtrain = np.delete(train, u, axis=0) bu = binary[u] if np.sum(bu) > 5: reg.fit(curtrain[:, bu].T, train[u, bu]) filled[u, ~bu] = reg.predict(curtrain[:, ~bu].T) return norm.inverse_transform(filled)
def predict(train): binary = (train > 0) reg = ElasticNetCV(fit_intercept=True, alphas=[ 0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.]) norm = NormalizePositive() train = norm.fit_transform(train) filled = train.copy() # iterate over all users for u in range(train.shape[0]): # remove the current user for training curtrain = np.delete(train, u, axis=0) bu = binary[u] if np.sum(bu) > 5: reg.fit(curtrain[:,bu].T, train[u, bu]) # Fill the values that were not there already filled[u, ~bu] = reg.predict(curtrain[:,~bu].T) return norm.inverse_transform(filled)
def predict(train): binary = (train > 0) reg = ElasticNetCV(fit_intercept=True, alphas=[ 0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.]) norm = NormalizePositive() train = norm.fit_transform(train) filled = train.copy() # 모든 사용자에 대해 반복 for u in range(train.shape[0]): # 훈련에서 현재 사용자 제거 curtrain = np.delete(train, u, axis=0) bu = binary[u] if np.sum(bu) > 5: reg.fit(curtrain[:,bu].T, train[u, bu]) # 이전에 없는 값을 넣는다 filled[u, ~bu] = reg.predict(curtrain[:,~bu].T) return norm.inverse_transform(filled)
def predict(train): binary = (train > 0) reg = ElasticNetCV(fit_intercept=True, alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.]) norm = NormalizePositive() train = norm.fit_transform(train) filled = train.copy() # 对于用户u for u in range(train.shape[0]): # curtrain是去掉了用户u的训练集 curtrain = np.delete(train, u, axis=0) bu = binary[u] #对于那些打分总数超过5的用户才进行预测 if np.sum(bu) > 5: #输入是其余用户对 用户u打过分的这些电影 的打分,标签是用户u实际的打分 reg.fit(curtrain[:, bu].T, train[u, bu]) # 对于用户u没打分的那部分电影进行预测 filled[u, ~bu] = reg.predict(curtrain[:, ~bu].T) return norm.inverse_transform(filled)