class Retrainer: """ Exploit internal model structures and relationships to reduce retrain cost for item-item k-NN search process. """ def __init__(self, implicit): self.initial = default(implicit) self.initialized = False self.selector = UnratedItemCandidateSelector() def fit_initial(self, ratings): _log.info('fitting initial model %s', self.initial) self.initial.fit(ratings) fd, path = tempfile.mkstemp(prefix='lkpy-predict', suffix='.pkl', dir=util.scratch_dir(joblib=True)) self.path = pathlib.Path(path) os.close(fd) del self.initial._sim_inv_ _log.info('persisting initial model file to shared memory') joblib.dump(self.initial.sim_matrix_, path) self.initial.sim_matrix_ = joblib.load(path) self.selector.fit(ratings) self.initialized = True def instantiate(self, opts): nnbrs, smin = opts model = copy(self.initial) _log.info('updating model to use %d sims', nnbrs) model.nnbrs = nnbrs keep = model.sim_matrix_.values >= smin _log.info('trimming model to keep %d sims', np.sum(keep)) model.sim_matrix_ = model.sim_matrix_.filter_nnzs(keep) model._sim_inv_ = model.sim_matrix_.transpose() return TopN(model, self.selector)
class NaiveBayesRecommender(Recommender): _count_tables = {} _item_features = None _nb_table = None _min_float = np.power(2.0, -149) def __init__(self, item_features=None, thresh=2.9, alpha=0.01, beta=0.01): self._count_tables = {} self._item_features = item_features self.selector = UnratedItemCandidateSelector() self._nb_table = NaiveBayesTable(thresh, alpha, beta) self.ensure_minimum_score(alpha) self.ensure_minimum_score(beta) # TODO: HOMEWORK 4 def fit(self, ratings, *args, **kwargs): # Must fit the selector self.selector.fit(ratings) self._nb_table.reset() # For each rating # Get associated item features # Update NBTable for index, row in ratings.iterrows(): user, rating, item = row['user'], row['rating'], row['item'] features = self.get_features_list(item) self._nb_table.process_rating(user, rating, features) # TODO: HOMEWORK 4 # Should return ordered data frame with items and score def recommend(self, user, n=None, candidates=None, ratings=None): # n is None or zero, return DataFrame with an empty item column if n is None or n == 0: return pd.DataFrame({'item': []}) if candidates is None: candidates = self.selector.candidates(user, ratings) # Initialize scores scores = [] # for each candidate for candidate in candidates: # Score the candidate for the user score = self.score_item(user, candidate) # Build list of candidate, score pairs lists = [candidate, score] scores.append(lists) # Turn result into data frame scores = pd.DataFrame(scores, columns=['item', 'score']) # Retain n largest scoring rows (nlargest) scores = scores.nlargest(n, 'score') # Sort by score (sort_values) scores = scores.sort_values(by='score', ascending=False) # return data frame return scores # TODO: HOMEWORK 4 # Helper function to return a list of features for an item from features data frame def get_features_list(self, item): if item not in self._count_tables: self._count_tables[item] = self._item_features[ self._item_features.item == item]['feature'] return self._count_tables[item] # TODO: HOMEWORK 4 def score_item(self, user, item): # get the features # initialize the liked and nliked scores with the base probability features = self.get_features_list(item) liked_scores = self._nb_table.user_prob(user, True) nliked_scores = self._nb_table.user_prob(user, False) # for each feature # update scores by multiplying with conditional probability for feature in features: liked_scores *= self._nb_table.user_feature_prob( user, feature, True) nliked_scores *= self._nb_table.user_feature_prob( user, feature, False) # Handle the case when scores go to zero. liked_scores = self.ensure_minimum_score(liked_scores) nliked_scores = self.ensure_minimum_score(nliked_scores) # Compute log-likelihood log_likelihood = np.log(liked_scores) - np.log(nliked_scores) # Handle zero again log_likelihood = self.ensure_minimum_score(log_likelihood) # Return result return log_likelihood # DO NOT ALTER def get_params(self, deep=True): return { 'item_features': self._item_features, 'thresh': self._nb_table.thresh, 'alpha': self._nb_table.alpha, 'beta': self._nb_table.beta } # DO NOT ALTER def ensure_minimum_score(self, val): if val == 0.0: return self._min_float else: return val
class NaiveBayesRecommender(Recommender): _count_tables = {} _item_features = None _nb_table = None _min_float = np.power(2.0, -149) def __init__(self, item_features=None, thresh=2.9, alpha=0.01, beta=0.01): self._item_features = item_features self.selector = UnratedItemCandidateSelector() self._nb_table = NaiveBayesTable(thresh, alpha, beta) # TODO: HOMEWORK 4 def fit(self, ratings, *args, **kwargs): # Must fit the selector self.selector.fit(ratings) self._nb_table.reset() self._item_features.columns = ['item', 'feature'] # For each rating for indexR, rowR in ratings.iterrows(): user = rowR['user'] item = rowR['item'] rating = rowR['rating'] # print("processing: ", user) # Get associated item features feature = self.get_features_list(item) self._nb_table.process_rating(user, rating, feature) # TODO: HOMEWORK 4 # Should return ordered data frame with items and score def recommend(self, user, n=None, candidates=None, ratings=None): # n is None or zero, return DataFrame with an empty item column if n is None or n == 0: return pd.DataFrame({'item': []}) if candidates is None: candidates = self.selector.candidates(user, ratings) # Initialize scores scores = [] # for each candidate for candidate in candidates: scores.append(self.score_item(user, candidate)) # Score the candidate for the user # Build list of candidate, score pairs # Turn result into data frame data = {'item': candidates, 'score': scores} df = pd.DataFrame(data, columns=['item', 'score']) # Retain n largest scoring rows (nlargest) df = df.nlargest(n, 'score') # Sort by score (sort_values) df = df.sort_values(by=['score'], ascending=False) # return data frame return df # TODO: HOMEWORK 4 # Helper function to return a list of features for an item from features data frame def get_features_list(self, item): features_list = [] for indexF, rowF in self._item_features.loc[self._item_features['item'] == item].iterrows(): features_list.append(rowF['feature']) return features_list # TODO: HOMEWORK 4 def score_item(self, user, item): # get the features features = self.get_features_list(item) # initialize the liked and nliked scores with the base probability baseP = self._nb_table.user_prob(user, True) baseNP = self._nb_table.user_prob(user, False) likeP = 1 nlikeP = 1 # for each feature for feature in features: likeP = likeP * self._nb_table.user_feature_prob( user, feature, True) nlikeP = nlikeP * self._nb_table.user_feature_prob( user, feature, False) # update scores by multiplying with conditional probability likeP = likeP * baseP nlikeP = nlikeP * baseNP try: ratio = likeP / nlikeP except ZeroDivisionError: # Handle the case when scores go to zero. return 0 # Compute log-likelihood try: LL = math.log(ratio, math.e) except ValueError: # Handle zero again return 0 # Return result return LL # DO NOT ALTER def get_params(self, deep=True): return { 'item_features': self._item_features, 'thresh': self._nb_table.thresh, 'alpha': self._nb_table.alpha, 'beta': self._nb_table.beta } # DO NOT ALTER def ensure_minimum_score(self, val): if val == 0.0: return self._min_float else: return val
class WeightedHybrid(Predictor): """ """ # HOMEWORK 3 TODO: Follow the constructor for Fallback, which can be found at # https: // github.com / lenskit / lkpy / blob / master / lenskit / algorithms / basic.py # Note that you will need to # -- Check for agreement between the set of weights and the number of algorithms supplied. # -- You should clone the algorithms with hwk3_util.my_clone() and store the cloned version. # -- You should normalize the weights so they sum to 1. # -- Keep the line that set the `selector` function. algorithms = [] weights = [] def __init__(self, algorithms, weights): """ Args: algorithms: a list of component algorithms. Each one will be trained. weights: weights for each component to combine predictions. """ # HWK 3: Code here self.algorithms = algorithms self.weights = weights self.selector = UnratedItemCandidateSelector() list3 = [] for i in self.weights: a = (i / sum(self.weights)) * 1 list3.append(a) self.weights = list3 def clone(self): return WeightedHybrid(self.algorithms, self.weights) # HOMEWORK 3 TODO: Complete this implementation # Will be similar to Fallback. Must also call self.selector.fit() def fit(self, ratings, *args, **kwargs): self.selector.fit(ratings) # HWK 3: Code here for algo in self.algorithms: algo.fit(ratings, *args, **kwargs) return self def candidates(self, user, ratings): return self.selector.candidates(user, ratings) # HOMEWORK 3 TODO: Complete this implementation # Computes the weighted average of the predictions from the component algorithms def predict_for_user(self, user, items, ratings=None): preds = None predall = [] # HWK 3: Code here #preds = None for algo in self.algorithms: #_logger.debug('predicting for %d items for user %s', len(remaining), user) aps = algo.predict_for_user(user, items, ratings=ratings) predall.append(aps) preds = predall[0] * self.weights[0] + predall[1] * self.weights[1] return preds def __str__(self): return 'Weighted([{}])'.format(', '.join(self.algorithms))
class WeightedHybrid(Predictor): """ """ # HOMEWORK 3 TODO: Follow the constructor for Fallback, which can be found at # https: // github.com / lenskit / lkpy / blob / master / lenskit / algorithms / basic.py # Note that you will need to # -- Check for agreement between the set of weights and the number of algorithms supplied. # -- You should clone the algorithms with hwk3_util.my_clone() and store the cloned version. # -- You should normalize the weights so they sum to 1. # -- Keep the line that set the `selector` function. algorithms = [] weights = [] def __init__(self, algorithms, weights): """ Args: algorithms: a list of component algorithms. Each one will be trained. weights: weights for each component to combine predictions. """ # HWK 3: Code here if len(algorithms) != len(weights): raise Exception( 'general exceptions not caught by specific handling') self.algorithms = [my_clone(algo) for algo in algorithms] self.weights = [my_clone(weight / sum(weights)) for weight in weights] self.selector = UnratedItemCandidateSelector() def clone(self): return WeightedHybrid(self.algorithms, self.weights) # HOMEWORK 3 TODO: Complete this implementation # Will be similar to Fallback. Must also call self.selector.fit() def fit(self, ratings, *args, **kwargs): # HWK 3: Code here for algo in self.algorithms: algo.fit(ratings) self.selector.fit(ratings) return self def candidates(self, user, ratings): return self.selector.candidates(user, ratings) # HOMEWORK 3 TODO: Complete this implementation # Computes the weighted average of the predictions from the component algorithms def predict_for_user(self, user, items, ratings=None): preds = np.zeros_like(items.shape[0]) # HWK 3: Code here for i in range(len(self.algorithms)): algo_pred = self.algorithms[i].predict_for_user(user, items, ratings=ratings) preds = preds + self.weights[i] * algo_pred return preds def __str__(self): return 'Weighted([{}])'.format(', '.join(self.algorithms))