def recommend_products_by_category(product_id: str, modification_type: str) -> list: with create_connection() as connection: cursor = connection.execute("SELECT categoryId FROM item_category_list WHERE itemId=(?)", (product_id,)) product_category_id = cursor.fetchone()[0] if modification_type == "same_category": cursor = connection.execute("SELECT itemId FROM item_category_list " "WHERE categoryId=(?)", (product_category_id,)) elif modification_type == "sibling_category": cursor = connection.execute("SELECT parentCategoryId FROM category WHERE id=(?)", (product_category_id,)) parent_category_id = cursor.fetchone()[0] cursor = connection.execute("SELECT id FROM category WHERE parentCategoryId=(?)", (parent_category_id,)) category_siblings = [item[0] for item in cursor.fetchall()] if parent_category_id: category_siblings.append(parent_category_id) cursor = connection.execute("SELECT itemId FROM item_category_list WHERE categoryId " "IN ({})".format(",".join([str(item) for item in category_siblings]))) result_items = [item[0] for item in cursor.fetchall()] if product_id in result_items: result_items.remove(product_id) return get_top_10_by_rating(result_items)
def _prep_data(self): """ prepare data for recommender 1. item-user scipy sparse matrix 2. hashmap of itemId to row index in item-user scipy sparse matrix """ connection = create_connection() with connection: # read data cursor = connection.cursor() cursor.execute("SELECT * FROM review") reviews = cursor.fetchall() cursor.close() columns = ['id', 'userId', 'itemId', 'rating', 'reviewTime'] df_ratings = pd.DataFrame(reviews, columns=columns) # pivot and create movie-user matrix item_user_mat = df_ratings.pivot(index='itemId', columns='userId', values='rating').fillna(0) # hashmap of itemId to row index in item-user scipy sparse matrix hashmap = {} index = 0 for i in item_user_mat.index: hashmap[index] = i index = index + 1 # transform matrix to scipy sparse matrix item_user_mat_sparse = csr_matrix(item_user_mat.values) # clean up del df_ratings, item_user_mat return item_user_mat_sparse, hashmap
def get_random_item_id(): connection = create_connection() with connection: cursor = connection.execute("SELECT id FROM item ORDER BY RANDOM() LIMIT 1") item_id = cursor.fetchone()[0] return item_id
def index(): connection = create_connection() with connection: cursor = connection.execute("SELECT id FROM item ORDER BY RANDOM() LIMIT 120") all_items = [get_item_dict(item[0]) for item in cursor.fetchall()] return render_template("main_page.html", products=all_items)
def algorithm_related(product_id: str, recommendations_count: int) -> list: connection = create_connection() similarity_recommender = SimilarityRecommender(connection, product_id) recommended = similarity_recommender.recommend_products( recommendations_count, find_similar_items_related, count_similarities_related) recommended_ids = [] for product in recommended: recommended_ids.append(product.id) connection.close() return recommended_ids
def get_top_10_by_rating(items: list) -> list: with create_connection() as connection: cursor = connection.execute("SELECT id, overallRating FROM item WHERE id " "IN ({})".format(",".join(['"{}"'.format(item_id) for item_id in items]))) rated_pairs = [(pair[0], pair[1]) for pair in cursor.fetchall()] rated_pairs.sort(key=lambda pair: pair[1], reverse=True) # adds a bit of randomness to the result rated_pairs = rated_pairs[:20] shuffle(rated_pairs) return [pair[0] for pair in rated_pairs[:10]]
def recommend_products_by_related(product_id: str, modification_type: str) -> list: with create_connection() as connection: if modification_type == "all": cursor = connection.execute("SELECT relatedItemId FROM item_related_list WHERE itemId=(?)", (product_id,)) else: cursor = connection.execute("SELECT relatedItemId FROM item_related_list WHERE itemId=(?) AND relation=(?)", (product_id, modification_type)) related = [item[0] for item in cursor.fetchall()] return get_top_10_by_rating(related)
def recommend_products_randomly(product_id: str) -> list: connection = create_connection() with connection: cursor = connection.execute( "SELECT id FROM item ORDER BY RANDOM() LIMIT 10;") recommended_products = [item[0] for item in cursor.fetchall()] # to ensure that the product_id is not in its own list of recommended products while product_id in recommended_products: recommended_products.remove(product_id) cursor = connection.execute( "SELECT id FROM item ORDER BY RANDOM() LIMIT 1;") recommended_products.append(cursor.fetchone()[0]) return recommended_products
def get_mean_per_item_list(): connection = create_connection() with connection: cursor = connection.cursor() cursor.execute("SELECT * FROM review") reviews = cursor.fetchall() cursor.close() columns = ['id', 'userId', 'itemId', 'rating', 'reviewTime'] df_ratings = pd.DataFrame(reviews, columns=columns) # pivot and create item-user matrix item_user_mat = df_ratings.pivot(index='itemId', columns='userId', values='rating') return item_user_mat.mean(axis=1, skipna=True)
def categories_statistics(): connection = create_connection() category_items_counts_cur = connection.execute("SELECT count(itemId) FROM item_category_list GROUP BY categoryId;") category_items_counts = [c[0] for c in category_items_counts_cur] category_items_counts_sorted = sorted(category_items_counts) category_items_counts_statistics = Counter(category_items_counts) category_items_counts_statistics_sorted = sorted(category_items_counts_statistics.items()) with open('../data/statistics/category_products_counts.txt', 'w', encoding='utf-8') as outp: outp.write('Postupně pro jednotlivé kategorie počty produktů.\n') for c in category_items_counts_sorted: outp.write(str(c) + '\n') with open('../data/statistics/category_products_counts_statistics.txt', 'w', encoding='utf-8') as outp: outp.write('x\ty ... Je y kategorií, které mají právě x produktů.\n') for c in category_items_counts_statistics_sorted: outp.write(str(c[0]) + '\t' + str(c[1]) + '\n')
def run(self): global LAST_TIME_CHECKPOINT LAST_TIME_CHECKPOINT = datetime.now() start_time = datetime.now() # prepare DB self.log("Preparing DB...") db_con = create_connection() self.prepare_tables(db_con) self.log_billboard(["Preparation of DB is DONE!"]) # parse input files ratings_by_id = self.parse_review_file(db_con) self.parse_meta_file(db_con, ratings_by_id) self.parse_related_and_categories(db_con) db_con.execute("PRAGMA foreign_keys = on") db_con.close() end_time = datetime.now() print("DONE - Script execution took: {}".format(end_time - start_time))
def get_item_dict(item_id: str) -> dict: connection = create_connection() with connection: cursor = connection.execute('''SELECT * FROM item WHERE id="{}"'''.format(item_id)) item = cursor.fetchone() if item is None: abort(404) return { "id": item[0], "title": html.unescape(item[1]) if item[1] and item[1] != "None" else "<TITLE>", "description": html.unescape(item[2]) if item[2] and item[2] != "None" else "", "price": item[3] if item[3] else "<PRICE>", "imageUrl": item[4] if item[4] else "", "salesCategory": html.unescape(item[5]) if item[5] and item[5] != "None" else "", "salesRank": item[6], "overallRating": item[7], "percentageRating": ((item[7] * 100) / 5) }
def feedback(item_id: str): connection = create_connection() with connection: to_insert = [item_id] for algo_type in TYPES_OF_ALGORITHMS: to_insert.append(get_int_value(request.form.get(algo_type, None))) query = ''' INSERT INTO algo_evaluation( itemId, random, relatedAll, relatedAlsoBought, relatedAlsoViewed, sameCategory, siblingCategory, collaborativeFiltering, contentBased, contentBasedWithCategory) VALUES {} '''.format(tuple(to_insert)) connection.execute(query) item_id = get_random_item_id() new_url = "/product/{}?submitted_feedback=true".format(item_id) return redirect(new_url)
def __init__(self): self.connection = create_connection()