def get_prediction(self, session):
        viewed_products_in_session = self._get_viewed_products_in_session(
            session)
        clean_products = load_json("data/baseline/clean-products.json")
        viewed_products_as_rows = self._get_matrix_row_ids_from_product_ids(
            viewed_products_in_session, clean_products)

        clean_sessions = load_json("data/baseline/clean-sessions.json")
        pm = DependencyFinder(clean_products)
        pm.parse_sessions_to_find_dependencies(clean_sessions)

        deps = self._get_dependand_columns_to_given_rows(
            viewed_products_as_rows, pm.dependencies.matrix)
        return deps
    def _init_empty_session_representation():
        template_session = {'price': DEFAULT_PRICE}
        cleaned_products = load_json(CLEAN_PRODUCTS_PATH)
        product_ids = [product['product_id'] for product in cleaned_products]

        for product_id in product_ids:
            template_session[str(product_id)] = 0

        product_repr = load_json(PRODUCTS_VECTORIZED_PATH)
        first_product = next(iter(product_repr))
        leafs = product_repr[first_product]['leafs']
        for leaf in leafs:
            template_session[leaf] = 0

        return template_session
def main(args):
    if args.find_weights:
        find_best_weights(args.max_bag_weight, args.std_mul, args.step_mul)
    else:
        gift_amounts = load_json(GIFT_AMOUNTS)
        gift_weights = load_json(DEFAULT_GIFT_WEIGHTS)
        gift_weight_avgs = gift_weights['avg']
        gift_weight_stds = gift_weights['std']
        p = BagPacker(gift_amounts, gift_weight_avgs, gift_weight_stds)
        bags = p.pack_bags(max_bag_weight=args.max_bag_weight, std_mul=args.std_mul)
        score_eval = ScoreEvaluator()
        print(f'Score: {score_eval.calculate_score(bags, EVAL_SCORE_ITERATIONS)}')

        if args.save_path:
            kaggle_preparer = KagglePreparer()
            kaggle_preparer.save(args.save_path, bags)
 def preprocess_products(self, viewed_products_ids: list):
     product_repr = load_json(PRODUCTS_VECTORIZED_PATH)
     sum_session = None
     for product_id in viewed_products_ids:
         sum_session = self._add_products(sum_session,
                                          product_repr[str(product_id)])
     return self._get_average_session_from_products_sum(sum_session)
    def _get_user_history_represented_as_single_session(
            self, clean_sessions, user_id):
        user_history_as_single_session_representation = self._init_empty_session_representation(
        ).copy()
        vectorised_products = load_json(PRODUCTS_VECTORIZED_PATH)

        for session in clean_sessions:
            if self._is_proper_non_buy_session_for_user(
                    session, user_id, vectorised_products):

                weight = session['age']**2
                seen_product = session['product_id']
                seen_product_price = vectorised_products[str(
                    seen_product)]['price']
                seen_product_category_correlation = vectorised_products[str(
                    seen_product)]['leafs']

                for single_correlation in seen_product_category_correlation:
                    correlation_value = seen_product_category_correlation[
                        single_correlation]
                    user_history_as_single_session_representation[
                        single_correlation] += correlation_value * weight

                user_history_as_single_session_representation[str(
                    seen_product)] += 1 * weight
                user_history_as_single_session_representation[
                    'price'] += seen_product_price * weight

        user_history_as_single_session_representation['age'] = DEFAULT_AGE
        return user_history_as_single_session_representation
def find_best_weights(bag_capacity, std_mul, step_mul):
    gift_amounts = load_json(GIFT_AMOUNTS)
    gift_stds = load_json(DEFAULT_GIFT_WEIGHTS)['std']
    weight_search = WeightSearch()
    score_eval = ScoreEvaluator()
    best_score = 0
    amount_of_iteration = FIND_WEIGHTS_N_ITER
    for i in range(amount_of_iteration):
        if not i == 0:
            weight_search.take_random_step(((amount_of_iteration - i) / amount_of_iteration) * step_mul)
        bp = BagPacker(dict(gift_amounts), weight_search.weights, gift_stds)
        bags = bp.pack_bags(max_bag_weight=bag_capacity, std_mul=std_mul)
        score = score_eval.calculate_score(bags)
        if score > best_score:
            best_score = score
        else:
            weight_search.step_back()
        print(f'Iteration: {i}, Score:{best_score}')
    print(weight_search.weights)
    write_json_file(DEFAULT_TEMP_WEIGHTS, weight_search.weights)
    bp = BagPacker(dict(gift_amounts), weight_search.weights, gift_stds)
    bags = bp.pack_bags(max_bag_weight=bag_capacity, std_mul=std_mul)
    score = score_eval.calculate_score(bags, 150)
    print(score)
def load_sessions_data():
    return load_json('data/clean-sessions.json')
def get_products_data_size():
    return len(load_json('data/clean-products.json'))
 def __init__(self, recommendation_len=DEFAULT_RECOMMENDATION_LEN):
     self.model_predictor = NNModelPredictor(Config())
     self.products = load_json(CLEAN_PRODUCTS_PATH)
     self.recommendation_len = recommendation_len
 def preprocess_user(self, user_id):
     clean_sessions = load_json(CLEAN_SESSION_PATH)
     ret = self._get_user_history_represented_as_single_session(
         clean_sessions, user_id)
     return ret
 def __init__(self):
     self.clean_products = load_json(CLEAN_PRODUCTS_PATH)
        for product_id in product_ids_vectorized:
            avg_session[product_id] = product_ids_vectorized[product_id]
        leafs = avg_session.pop('leafs')
        for leaf in leafs:
            avg_session[leaf] = leafs[leaf]
        avg_session['age'] = DEFAULT_AGE

        return avg_session

    def _add_products(self, product1: dict, product2: dict):
        if product1 is None and product2 is not None:
            return product2
        elif product1 is not None and product2 is None:
            return product1
        elif product1 is None and product2 is None:
            return None
        else:
            for leaf in product1['leafs']:
                product1['leafs'][leaf] += product2['leafs'][leaf]
            for product_id in product1['vectorized_product_id']:
                product1['vectorized_product_id'][product_id] += product2[
                    'vectorized_product_id'][product_id]
            return product1


if __name__ == '__main__':
    srp = SessionRecommendationPreprocessor()
    products_vectorized = srp.prepare_products_vectorized(
        load_json(CLEAN_PRODUCTS_PATH))
    write_json_file(PRODUCTS_VECTORIZED_PATH, products_vectorized)
from utils.files_io import load_json, write_json_file

if __name__ == '__main__':
    products = load_json('data/clean-products.json')
    product_ids = set()
    for product in products:
        product_ids.add(product['product_id'])
    sessions = load_json('data/clean-sessions.json')
    filtered_sessions = [session for session in sessions if session['product_id'] in product_ids]
    write_json_file('data/clean-sessions.json', filtered_sessions)
 def __init__(self):
     self.weights = load_json(DEFAULT_GIFT_WEIGHTS)['avg']
     self.prev_weights = self.weights.copy()