def most_frequently_bought_by_user(data_set, user_id): repository = Repository(data_set=data_set) user_products = repository.get_products_bought_by_user(user_id) recommended = sorted(user_products, key=lambda item: item['count'], reverse=True)[:20] for p in recommended: product = repository.get_product(p['_id']) if product: logger.info("{} {}".format(product['product_name'], p['count']))
def analyze_products_by_user(data_set): repository = Repository(data_set=data_set) users = repository.get_users() count = 0 total = len(users) for user_ids in batch(users, 100): users_products = [] for user_id in user_ids: user_products = repository.get_products_bought_by_user(user_id) user_products = dict(user_id=user_id, products=[ dict(product_id=p['_id'], count=p['count']) for p in user_products ]) users_products.append(user_products) count += 1 logger.info("{}/{}".format(count, total)) repository.add_user_products(users_products)