def obtain_history_decay_factor(self, product_id): """ Produces a history decay factor according to some mathematical function of the number of previous recommendations of a certain product to a certain user. :param product_id: The id of the product. """ if self.history_decay_function_name is None: return 1 if self.user_impressions_summary is not None: latest_impressions_count = self.user_impressions_summary.get(product_id, (0, None))[0] else: latest_impressions_count = 0 result = 1 if self.history_decay_function_name == "linear": ttl = self.history_decay_linear_function_ttl result = df.linear(latest_impressions_count, ttl) elif self.history_decay_function_name == "rational": result = df.rational(latest_impressions_count) elif self.history_decay_function_name == "exponential": halflife = self.history_decay_exponential_function_halflife result = df.exponential(latest_impressions_count, halflife) elif self.history_decay_function_name == "step": ttl = self.history_decay_step_function_ttl result = df.step(latest_impressions_count, 1, -1, ttl) return result
def obtain_product_age_decay_factor(self, product_date, present_date): """ Produces an age decay factor according to some mathematical function of the number of units of time (days, weeks) since the product was added to the system. :param product_date: The date of the product. :param present_date: The current date. """ if self.product_age_decay_function_name is None: return 1 product_age_in_days = None if product_date is not None: try: utc_product_date = pytz.utc.localize(product_date) except Exception: utc_product_date = product_date product_age = present_date - utc_product_date product_age_in_days = product_age.days result = 1 if self.product_age_decay_function_name == "linear": ttl = self.product_age_decay_linear_function_ttl result = df.linear(product_age_in_days, ttl) elif self.product_age_decay_function_name == "rational": result = df.rational(product_age_in_days) elif self.product_age_decay_function_name == "exponential": halflife = self.product_age_decay_exponential_function_halflife result = df.exponential(product_age_in_days, halflife) elif self.product_age_decay_function_name == "step": ttl = self.product_age_decay_step_function_ttl result = df.step(product_age_in_days, 1, -1, ttl) return result
def merge_algorithm_contributions(self, sorted_scores_by_algorithm, n_recommendations): """ See barbante.recommend.HybridRecommender. """ log.debug("Merging contributions...") # Implements the voting system. votes_by_product = {} max_vote_value = max(1000, n_recommendations) # considers at least the top 1000 products of each algorithm for algorithm_recipe in self.session_context.algorithm_weights[self.get_suffix()]: alg = algorithm_recipe[0] weight = algorithm_recipe[1] sorted_scores = sorted_scores_by_algorithm.get(alg, {}) for idx, (_, product) in enumerate(sorted_scores): if idx == n_recommendations: break vote_value = max_vote_value * exponential(idx, HALF_LIFE_FORMULA_1_SCORING) votes = votes_by_product.get(product, 0) votes_by_product[product] = votes + vote_value * weight log.info("Algorithm [%s] ranked [%d] products" % (alg, len(sorted_scores))) recommendations = [([votes], product) for product, votes in votes_by_product.items()] sorted_recommendations = heapq.nlargest(n_recommendations, recommendations) # Applies periodic bonuses to prevent a monopoly of the highest weighted algorithm in the top standings. final_ranking = [] ranked_products_set = set() recommendations_count = len(sorted_recommendations) for idx in range(recommendations_count): # Bonus time? if idx % self.bonus_period == 0: bonus_count = idx // self.bonus_period for algorithm_recipe in self.session_context.algorithm_weights[self.get_suffix()]: alg = algorithm_recipe[0] if NO_BONUS_DIRECTIVE in algorithm_recipe: continue sorted_scores = sorted_scores_by_algorithm.get(alg) if sorted_scores is None: continue if bonus_count < len(sorted_scores): score_and_product = sorted_scores[bonus_count] if score_and_product is not None: self._include_product(score_and_product[1], # the forced product (benefited by bonus) score_and_product[0], # its original score final_ranking, ranked_products_set, recommendations_count) score_and_product = sorted_recommendations[idx] self._include_product(score_and_product[1], # the current product score_and_product[0], # its original score final_ranking, ranked_products_set, recommendations_count) return final_ranking
def compute_similarity_for_date(product_attr_value, template_attr_value, halflife): time_delta = max(product_attr_value, template_attr_value) - min(product_attr_value, template_attr_value) days = time_delta.days return df.exponential(days, halflife)