class RRCP(BaseEstimator): def __init__(self, n=5, max_iter=20, lambda_iter=0.03, lambda_regular=0.03): self.n = n self.max_iter = max_iter self.lambda_iter = lambda_iter self.lambda_regular = lambda_regular self.target_sample = [] def model_score_without_log(self, alpha, beta, gamma, rate, price): return alpha * rate + beta * price**2 + gamma def sample_target(self): sample_ratio = 1 for user_ix in xrange(self.dataModel.getUsersNum()): bought = list( self.dataModel.getData()[user_ix].nonzero()[1]) * sample_ratio origin_non_bought = [ x for x in xrange(self.dataModel.getItemsNum()) if x not in bought ] #print len(origin_non_bought), len(bought) non_bought_indices = random.sample(xrange(len(origin_non_bought)), len(bought)) non_bought = [origin_non_bought[i] for i in non_bought_indices] samples = zip(bought, non_bought) self.target_sample.append(samples) ''' for user_ix in xrange(self.dataModel.getUsersNum()): bought = list(self.dataModel.getData()[user_ix].nonzero()[1]) origin_non_bought = [x for x in xrange(self.dataModel.getItemsNum()) if x not in bought] user_samples = [] for bought_item in bought: for non_bought_item in origin_non_bought: sample = (bought_item, non_bought_item) user_samples.append(sample) self.target_sample.append(user_samples) ''' def target_value(self, user_ix): result_value = 0.0 if not self.target_sample: self.sample_target() samples = self.target_sample[user_ix] user_value = [ (self.model_score(user_ix, i) - self.model_score(user_ix, j)) for i, j in samples ] result_value += sum(user_value) regular = self.alpha[user_ix]**2 + self.beta[user_ix]**2 result_value += regular return result_value def model_score(self, user_ix, item_ix): alpha = self.alpha[user_ix] beta = self.beta[user_ix] gamma = self.gamma[user_ix] rate = self.mf_rate[user_ix, item_ix] price = self.personal_item_price[user_ix][item_ix] prob = MyGaussian.get_prob_from_gaussian( self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_ix], self.param_mu, self.param_sigm) result = alpha * rate + beta * np.exp(prob) + gamma return result def update(self, user_ix, item_i, item_j): ''' u * k C = U s V D = utility = rate_ui , price_ui utility = alpha * rate - beta * price - utility_2 rate_ui object = alpha_uc * (rate_1 - rate_2) + beta_uc * (price_1 - price_2) model_score_i = self.model_score_without_log(alpha, beta, gamma, rate_i, price_i) model_score_j = self.model_score_without_log(alpha, beta, gamma, rate_j, price_j) new_alpha = alpha - self.lambda_iter * ((rate_i * model_score_j - rate_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * alpha) new_beta = beta - self.lambda_iter * ((price_i**2 * model_score_j - price_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * beta) new_gamma = gamma - self.lambda_iter * ((model_score_j - model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * gamma) ''' alpha = self.alpha[user_ix] beta = self.beta[user_ix] rate_i = self.mf_rate[user_ix][item_i] rate_j = self.mf_rate[user_ix][item_j] prob_i = MyGaussian.get_prob_from_gaussian( self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_i], self.param_mu, self.param_sigm) prob_j = MyGaussian.get_prob_from_gaussian( self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_j], self.param_mu, self.param_sigm) new_alpha = alpha + self.lambda_iter * ( rate_i - rate_j) - 2 * self.lambda_regular * alpha**2 new_beta = beta + self.lambda_iter * (np.exp(prob_i) - np.exp( prob_j)) - 2 * self.lambda_regular * beta**2 return new_alpha, new_beta def sample(self, user_ix, max_iter): item_num = self.dataModel.getItemsNum() bought = list(self.dataModel.getData()[user_ix].nonzero()[1]) random.shuffle(bought) bought = bought[:max_iter] result = [] while len(bought) + len(result) < item_num and len(result) < max_iter: for bought_item in bought: non_bought_item = bought_item while non_bought_item in bought or (bought_item, non_bought_item) in result: non_bought_item = np.random.randint(item_num) result.append((bought_item, non_bought_item)) random.shuffle(result) result = result[:int(math.ceil(len(result) * 0.7))] return result def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) data = np.array(self.dataModel.getData().todense()) u, s, v = isvd(data) new_data = np.dot(u, np.dot(s, v)) new_data[new_data < 1] = np.nan u, s, v = isvd(new_data) new_data = np.dot(u, np.dot(s, v)) #new_data[new_data < 1] = 0 self.mf_rate = new_data train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price']) item_price = dict( zip(train_data.ix[:, 'item'], train_data.ix[:, 'price'])) data = train_data.groupby('user').mean() self.personal_item_price = np.empty( (self.dataModel.getUsersNum(), self.dataModel.getItemsNum())) for user_ix in xrange(self.dataModel.getUsersNum()): user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price'] for item_ix in xrange(self.dataModel.getItemsNum()): delta_item_price = ( item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price self.personal_item_price[user_ix][item_ix] = item_price[ self.dataModel.getItemByIid(item_ix)] b = {'phones': train_data} price_df = Construction.get_user_category_buy_price(b) self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit( price_df) self.alpha = np.random.rand(self.dataModel.getUsersNum()) self.beta = np.random.rand(self.dataModel.getUsersNum()) self.gamma = np.zeros(self.dataModel.getUsersNum()) origin_lambda_iter = self.lambda_iter for user_ix in xrange(self.dataModel.getUsersNum()): samples = self.sample(user_ix, self.max_iter) self.lambda_iter = origin_lambda_iter old_target_value = 0 for item_1, item_2 in samples: new_alpha, new_beta = self.update(user_ix, item_1, item_2) ##if not old_target_value or old_target_value < target_value: #old_target_value = target_value self.alpha[user_ix] = new_alpha self.beta[user_ix] = new_beta self.lambda_iter = self.lambda_iter * 0.9 #else: #break #print user_ix, target_value #self.gamma[user_ix] = new_gamma #print user_ix, self.target_value(user_ix) #print user_ix, self.target_value(user_ix) self.lambda_iter = origin_lambda_iter self.beta = np.zeros(self.dataModel.getUsersNum()) def recommend(self, u): uid = self.dataModel.getUidByUser(u) predict_scores = [] for i in range(self.dataModel.getItemsNum()): predict_scores.append(self.model_score(uid, i)) topN = np.argsort(np.array(predict_scores))[-1:-self.n - 1:-1] for item in topN: price = self.personal_item_price[uid][item] #print self.mf_rate[u, item], MyGaussian.get_prob_from_gaussian(uid, 'phones', price, self.param_mu, self.param_sigm) return [self.dataModel.getItemByIid(i) for i in topN] def score(self, testSamples, trueLabels=None): trueList = [] recommendList = [] user_unique = list(set(np.array(testSamples)[:, 0])) for u in user_unique: uTrueIndex = np.argwhere(np.array(testSamples)[:, 0] == u)[:, 0] #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])] true = list(np.array(testSamples)[uTrueIndex][:, 1]) ''' if not self.dataModel.getUidByUser(u) == -1: for item in uTrueIndex: price = testSamples[item][2] print self.mf_rate[self.dataModel.getUidByUser(u), self.dataModel.getIidByItem(testSamples[item][1])], MyGaussian.get_prob_from_gaussian(u, 'phones', price, self.param_mu, self.param_sigm) ''' if not self.dataModel.getUidByUser(u) == -1: trueList.append(true) pre = self.recommend(u) recommendList.append(pre) e = Eval() result = e.evalAll(trueList, recommendList) print 'RRCP result:' + '(' + str(self.get_params()) + '):\t' + str( (result)['F1']) return (result)['F1']
class RRCP(BaseEstimator): def __init__(self, n=5, max_iter = 20, lambda_iter = 0.03, lambda_regular = 0.03): self.n = n self.max_iter = max_iter self.lambda_iter = lambda_iter self.lambda_regular = lambda_regular self.target_sample = [] def model_score_without_log(self, alpha, beta, gamma, rate, price): return alpha * rate + beta * price**2 + gamma def sample_target(self): sample_ratio = 1 for user_ix in xrange(self.dataModel.getUsersNum()): bought = list(self.dataModel.getData()[user_ix].nonzero()[1]) * sample_ratio origin_non_bought = [x for x in xrange(self.dataModel.getItemsNum()) if x not in bought] #print len(origin_non_bought), len(bought) non_bought_indices = random.sample(xrange(len(origin_non_bought)), len(bought)) non_bought = [origin_non_bought[i] for i in non_bought_indices] samples = zip(bought, non_bought) self.target_sample.append(samples) ''' for user_ix in xrange(self.dataModel.getUsersNum()): bought = list(self.dataModel.getData()[user_ix].nonzero()[1]) origin_non_bought = [x for x in xrange(self.dataModel.getItemsNum()) if x not in bought] user_samples = [] for bought_item in bought: for non_bought_item in origin_non_bought: sample = (bought_item, non_bought_item) user_samples.append(sample) self.target_sample.append(user_samples) ''' def target_value(self, user_ix): result_value = 0.0 if not self.target_sample: self.sample_target() samples = self.target_sample[user_ix] user_value = [(self.model_score(user_ix, i)-self.model_score(user_ix, j)) for i, j in samples] result_value += sum(user_value) regular = self.alpha[user_ix]**2 + self.beta[user_ix]**2 result_value += regular return result_value def model_score(self, user_ix, item_ix): alpha = self.alpha[user_ix] beta = self.beta[user_ix] gamma = self.gamma[user_ix] rate = self.mf_rate[user_ix, item_ix] price = self.personal_item_price[user_ix][item_ix] prob = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_ix], self.param_mu, self.param_sigm) result = alpha * rate + beta * np.exp(prob) + gamma return result def update(self, user_ix, item_i, item_j): ''' u * k C = U s V D = utility = rate_ui , price_ui utility = alpha * rate - beta * price - utility_2 rate_ui object = alpha_uc * (rate_1 - rate_2) + beta_uc * (price_1 - price_2) model_score_i = self.model_score_without_log(alpha, beta, gamma, rate_i, price_i) model_score_j = self.model_score_without_log(alpha, beta, gamma, rate_j, price_j) new_alpha = alpha - self.lambda_iter * ((rate_i * model_score_j - rate_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * alpha) new_beta = beta - self.lambda_iter * ((price_i**2 * model_score_j - price_j * model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * beta) new_gamma = gamma - self.lambda_iter * ((model_score_j - model_score_i) / (model_score_j ** 2) - 2 * self.lambda_regular * gamma) ''' alpha = self.alpha[user_ix] beta = self.beta[user_ix] rate_i = self.mf_rate[user_ix][item_i] rate_j = self.mf_rate[user_ix][item_j] prob_i = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_i], self.param_mu, self.param_sigm) prob_j = MyGaussian.get_prob_from_gaussian(self.dataModel.getUserByUid(user_ix), 'phones', self.personal_item_price[user_ix][item_j], self.param_mu, self.param_sigm) new_alpha = alpha + self.lambda_iter * (rate_i - rate_j) - 2 * self.lambda_regular * alpha**2 new_beta = beta + self.lambda_iter * (np.exp(prob_i) - np.exp(prob_j)) - 2 * self.lambda_regular * beta**2 return new_alpha, new_beta def sample(self, user_ix, max_iter): item_num = self.dataModel.getItemsNum() bought = list(self.dataModel.getData()[user_ix].nonzero()[1]) random.shuffle(bought) bought = bought[:max_iter] result = [] while len(bought) + len(result) < item_num and len(result) < max_iter: for bought_item in bought: non_bought_item = bought_item while non_bought_item in bought or (bought_item, non_bought_item) in result: non_bought_item = np.random.randint(item_num) result.append((bought_item, non_bought_item)) random.shuffle(result) result = result[:int(math.ceil(len(result) * 0.7))] return result def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) data = np.array(self.dataModel.getData().todense()) u,s,v = isvd(data) new_data = np.dot(u, np.dot(s, v)) new_data[new_data < 1] = np.nan u,s,v = isvd(new_data) new_data = np.dot(u, np.dot(s, v)) #new_data[new_data < 1] = 0 self.mf_rate = new_data train_data = pd.DataFrame(trainSamples, columns=['user', 'item', 'price']) item_price = dict(zip(train_data.ix[:,'item'], train_data.ix[:,'price'])) data = train_data.groupby('user').mean() self.personal_item_price = np.empty((self.dataModel.getUsersNum(), self.dataModel.getItemsNum())) for user_ix in xrange(self.dataModel.getUsersNum()): user_avg_price = data.loc[self.dataModel.getUserByUid(user_ix), 'price'] for item_ix in xrange(self.dataModel.getItemsNum()): delta_item_price = (item_price[self.dataModel.getItemByIid(item_ix)] - user_avg_price) / user_avg_price self.personal_item_price[user_ix][item_ix] = item_price[self.dataModel.getItemByIid(item_ix)] b = {'phones':train_data} price_df = Construction.get_user_category_buy_price(b) self.param_mu, self.param_sigm = MyGaussian.gaussian_curve_fit(price_df) self.alpha = np.random.rand(self.dataModel.getUsersNum()) self.beta = np.random.rand(self.dataModel.getUsersNum()) self.gamma = np.zeros(self.dataModel.getUsersNum()) origin_lambda_iter = self.lambda_iter for user_ix in xrange(self.dataModel.getUsersNum()): samples = self.sample(user_ix, self.max_iter) self.lambda_iter = origin_lambda_iter old_target_value = 0 for item_1, item_2 in samples: new_alpha, new_beta = self.update(user_ix, item_1, item_2) ##if not old_target_value or old_target_value < target_value: #old_target_value = target_value self.alpha[user_ix] = new_alpha self.beta[user_ix] = new_beta self.lambda_iter = self.lambda_iter * 0.9 #else: #break #print user_ix, target_value #self.gamma[user_ix] = new_gamma #print user_ix, self.target_value(user_ix) #print user_ix, self.target_value(user_ix) self.lambda_iter = origin_lambda_iter self.beta = np.zeros(self.dataModel.getUsersNum()) def recommend(self, u): uid = self.dataModel.getUidByUser(u) predict_scores = [] for i in range(self.dataModel.getItemsNum()): predict_scores.append(self.model_score(uid, i)) topN = np.argsort(np.array(predict_scores))[-1:-self.n-1:-1] for item in topN: price = self.personal_item_price[uid][item] #print self.mf_rate[u, item], MyGaussian.get_prob_from_gaussian(uid, 'phones', price, self.param_mu, self.param_sigm) return [self.dataModel.getItemByIid(i) for i in topN] def score(self, testSamples, trueLabels=None): trueList = [] recommendList= [] user_unique = list(set(np.array(testSamples)[:,0])) for u in user_unique: uTrueIndex = np.argwhere(np.array(testSamples)[:,0] == u)[:,0] #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])] true = list(np.array(testSamples)[uTrueIndex][:,1]) ''' if not self.dataModel.getUidByUser(u) == -1: for item in uTrueIndex: price = testSamples[item][2] print self.mf_rate[self.dataModel.getUidByUser(u), self.dataModel.getIidByItem(testSamples[item][1])], MyGaussian.get_prob_from_gaussian(u, 'phones', price, self.param_mu, self.param_sigm) ''' if not self.dataModel.getUidByUser(u) == -1: trueList.append(true) pre = self.recommend(u) recommendList.append(pre) e = Eval() result = e.evalAll(trueList, recommendList) print 'RRCP result:'+'('+str(self.get_params())+'):\t' + str((result)['F1']) return (result)['F1']