def get_prediction(self, session): viewed_products_in_session = self._get_viewed_products_in_session( session) clean_products = load_json("data/baseline/clean-products.json") viewed_products_as_rows = self._get_matrix_row_ids_from_product_ids( viewed_products_in_session, clean_products) clean_sessions = load_json("data/baseline/clean-sessions.json") pm = DependencyFinder(clean_products) pm.parse_sessions_to_find_dependencies(clean_sessions) deps = self._get_dependand_columns_to_given_rows( viewed_products_as_rows, pm.dependencies.matrix) return deps
def _init_empty_session_representation(): template_session = {'price': DEFAULT_PRICE} cleaned_products = load_json(CLEAN_PRODUCTS_PATH) product_ids = [product['product_id'] for product in cleaned_products] for product_id in product_ids: template_session[str(product_id)] = 0 product_repr = load_json(PRODUCTS_VECTORIZED_PATH) first_product = next(iter(product_repr)) leafs = product_repr[first_product]['leafs'] for leaf in leafs: template_session[leaf] = 0 return template_session
def main(args): if args.find_weights: find_best_weights(args.max_bag_weight, args.std_mul, args.step_mul) else: gift_amounts = load_json(GIFT_AMOUNTS) gift_weights = load_json(DEFAULT_GIFT_WEIGHTS) gift_weight_avgs = gift_weights['avg'] gift_weight_stds = gift_weights['std'] p = BagPacker(gift_amounts, gift_weight_avgs, gift_weight_stds) bags = p.pack_bags(max_bag_weight=args.max_bag_weight, std_mul=args.std_mul) score_eval = ScoreEvaluator() print(f'Score: {score_eval.calculate_score(bags, EVAL_SCORE_ITERATIONS)}') if args.save_path: kaggle_preparer = KagglePreparer() kaggle_preparer.save(args.save_path, bags)
def preprocess_products(self, viewed_products_ids: list): product_repr = load_json(PRODUCTS_VECTORIZED_PATH) sum_session = None for product_id in viewed_products_ids: sum_session = self._add_products(sum_session, product_repr[str(product_id)]) return self._get_average_session_from_products_sum(sum_session)
def _get_user_history_represented_as_single_session( self, clean_sessions, user_id): user_history_as_single_session_representation = self._init_empty_session_representation( ).copy() vectorised_products = load_json(PRODUCTS_VECTORIZED_PATH) for session in clean_sessions: if self._is_proper_non_buy_session_for_user( session, user_id, vectorised_products): weight = session['age']**2 seen_product = session['product_id'] seen_product_price = vectorised_products[str( seen_product)]['price'] seen_product_category_correlation = vectorised_products[str( seen_product)]['leafs'] for single_correlation in seen_product_category_correlation: correlation_value = seen_product_category_correlation[ single_correlation] user_history_as_single_session_representation[ single_correlation] += correlation_value * weight user_history_as_single_session_representation[str( seen_product)] += 1 * weight user_history_as_single_session_representation[ 'price'] += seen_product_price * weight user_history_as_single_session_representation['age'] = DEFAULT_AGE return user_history_as_single_session_representation
def find_best_weights(bag_capacity, std_mul, step_mul): gift_amounts = load_json(GIFT_AMOUNTS) gift_stds = load_json(DEFAULT_GIFT_WEIGHTS)['std'] weight_search = WeightSearch() score_eval = ScoreEvaluator() best_score = 0 amount_of_iteration = FIND_WEIGHTS_N_ITER for i in range(amount_of_iteration): if not i == 0: weight_search.take_random_step(((amount_of_iteration - i) / amount_of_iteration) * step_mul) bp = BagPacker(dict(gift_amounts), weight_search.weights, gift_stds) bags = bp.pack_bags(max_bag_weight=bag_capacity, std_mul=std_mul) score = score_eval.calculate_score(bags) if score > best_score: best_score = score else: weight_search.step_back() print(f'Iteration: {i}, Score:{best_score}') print(weight_search.weights) write_json_file(DEFAULT_TEMP_WEIGHTS, weight_search.weights) bp = BagPacker(dict(gift_amounts), weight_search.weights, gift_stds) bags = bp.pack_bags(max_bag_weight=bag_capacity, std_mul=std_mul) score = score_eval.calculate_score(bags, 150) print(score)
def load_sessions_data(): return load_json('data/clean-sessions.json')
def get_products_data_size(): return len(load_json('data/clean-products.json'))
def __init__(self, recommendation_len=DEFAULT_RECOMMENDATION_LEN): self.model_predictor = NNModelPredictor(Config()) self.products = load_json(CLEAN_PRODUCTS_PATH) self.recommendation_len = recommendation_len
def preprocess_user(self, user_id): clean_sessions = load_json(CLEAN_SESSION_PATH) ret = self._get_user_history_represented_as_single_session( clean_sessions, user_id) return ret
def __init__(self): self.clean_products = load_json(CLEAN_PRODUCTS_PATH)
for product_id in product_ids_vectorized: avg_session[product_id] = product_ids_vectorized[product_id] leafs = avg_session.pop('leafs') for leaf in leafs: avg_session[leaf] = leafs[leaf] avg_session['age'] = DEFAULT_AGE return avg_session def _add_products(self, product1: dict, product2: dict): if product1 is None and product2 is not None: return product2 elif product1 is not None and product2 is None: return product1 elif product1 is None and product2 is None: return None else: for leaf in product1['leafs']: product1['leafs'][leaf] += product2['leafs'][leaf] for product_id in product1['vectorized_product_id']: product1['vectorized_product_id'][product_id] += product2[ 'vectorized_product_id'][product_id] return product1 if __name__ == '__main__': srp = SessionRecommendationPreprocessor() products_vectorized = srp.prepare_products_vectorized( load_json(CLEAN_PRODUCTS_PATH)) write_json_file(PRODUCTS_VECTORIZED_PATH, products_vectorized)
from utils.files_io import load_json, write_json_file if __name__ == '__main__': products = load_json('data/clean-products.json') product_ids = set() for product in products: product_ids.add(product['product_id']) sessions = load_json('data/clean-sessions.json') filtered_sessions = [session for session in sessions if session['product_id'] in product_ids] write_json_file('data/clean-sessions.json', filtered_sessions)
def __init__(self): self.weights = load_json(DEFAULT_GIFT_WEIGHTS)['avg'] self.prev_weights = self.weights.copy()