def profile_doelgroep(): profiles_orders_query = """SELECT pf.ID, d.ID, o.count FROM profiles AS pf, sessions AS s, products AS pd, orders AS o, doelgroepen AS d WHERE pf.ID = s.profilesID AND s.ID = o.sessionsID AND o.productID = pd.ID AND d.ID = pd.doelgroepenID ORDER BY o.count DESC""" profiles_ppv_query = """SELECT ppv.profileid, d.ID, ppv.count FROM profiles_previously_viewed as ppv, doelgroepen as d, products WHERE ppv.productid = products.id AND d.id = products.doelgroepenID""" orders_data = bsql.select_data(cur, profiles_orders_query) ppv_data = bsql.select_data(cur, profiles_ppv_query) all_data = orders_data + ppv_data print(len(orders_data), len(ppv_data), len(orders_data) + len(ppv_data), len(all_data)) profiles = {} c = 0 for row in all_data: c += 1 profile_id = row[0] doelgroep = row[1] doelgroep_count = row[2] if profile_id in profiles: if doelgroep in profiles[profile_id]: profiles[profile_id][doelgroep] += doelgroep_count else: profiles[profile_id][doelgroep] = doelgroep_count else: profiles[profile_id] = {doelgroep: doelgroep_count} # print(profile_id) with open('profiles_doelgroepen.csv', 'w', newline='', encoding='utf-8') as pd_file: pd_fieldnames = ["profileID", "doelgroepID"] pd_writer = csv.DictWriter(pd_file, fieldnames=pd_fieldnames) pd_writer.writeheader() for profile in profiles: pd_writer.writerow({ "profileID": profile, "doelgroepID": max(profiles[profile], key=lambda key: profiles[profile][key]) }) print(len(profiles))
def sim_products(): combis = get_combis() for combi in combis: # print(combi) simproducts_data = bsql.select_data( cur, f"SELECT * FROM soort_gelijke_producten WHERE productid = '{combi}';" ) # print(f"SELECT * FROM soort_gelijke_producten WHERE productid = '{combi}';") for row in simproducts_data: # print(combi, "lijkt op", row) for i in range(1, 6): if row[i] in combis: # print(row[i], "WEL in", combis) # print(combis[row[i]]) for p in combis[row[i]]: if p in combis[combi]: combis[combi][p] += 1 else: combis[combi][p] = 1 else: pass # print(row[i], "not in", combis) return combis with open('test.json', 'w') as tf: tf.write(json.dumps(combis, indent=4))
def combineert_goed_met(): orders_cats = {} orders = {} query_1 = ''' select sessionsid, productid from orders ''' data_1 = bsql.select_data(cur, query_1) # query_2 = ''' # select s.id, count(p.id) # from products as p, orders as o, sessions as s # where p.id = o.productid # and o.sessionsid = s.id # group by s.id # having # count(p.id) > 1; # ''' # data_2 = bsql.select_data(cur, query_2) counter = 0 list = [] for row in data_1: counter += 1 if counter % 100 == 0: print(counter) ses_id = row[0] products_in_order = row[1] list.append([ses_id, products_in_order]) # print(data_1) print('return') return data_1
def get_single_sim(product_id): query = f"""SELECT * FROM soort_gelijke_producten WHERE productid = '{product_id}';""" data = bsql.select_data(cur, query) sim = [] for row in data: for i in range(1, 6): sim.append(row[i]) return sim
def get_subcategory(product_id): query = f"""SELECT subcategory FROM categories WHERE id =( SELECT categoriesid FROM products WHERE id = '{product_id}')""" data = bsql.select_data(cur, query) sc = None for row in data: sc = row[0] return sc
def most_bought(): orders_cats = {} orders = {} query = ''' SELECT o.productid, c.category, o.count FROM orders AS o, categories AS c, products AS p WHERE o.productid = p.ID AND p.categoriesid = c.ID AND o.sessionsid in ( SELECT id FROM sessions WHERE session_end BETWEEN ( select session_end FROM sessions ORDER BY session_end DESC LIMIT 1) - INTERVAL '30 DAY' AND (select session_end FROM sessions ORDER BY session_end DESC LIMIT 1))''' data = bsql.select_data(cur, query) for row in data: cat = row[1] product_id = row[0] product_count = row[2] if cat not in orders_cats: orders_cats[cat] = {} if product_id in orders_cats[cat]: orders_cats[cat][product_id] += product_count else: orders_cats[cat][product_id] = product_count if product_id in orders: orders[product_id] += product_count else: orders[product_id] = product_count cats_popular = {} for k in orders_cats.keys(): orders_cats[k] = sorted(orders_cats[k].items(), key=lambda x: x[1], reverse=True) orders = sorted(orders.items(), key=lambda x: x[1], reverse=True) return orders, orders_cats
def doelgroepen_products(): # Welke producten zjn het populairste per doelgroep query = ''' SELECT * FROM most_popular_products''' data = bsql.select_data(cur, query) doelgroepen_prods = {} print(len(data)) for row in data: row_query = f""" SELECT doelgroepenid FROM products WHERE id = '{row[0]}'""" row_data = bsql.select_data(cur, row_query) for row_row in row_data: dgid = row_row[0] if dgid in doelgroepen_prods: if len(doelgroepen_prods[dgid]) < 5: doelgroepen_prods[dgid].append(row[0]) else: doelgroepen_prods[dgid] = list() doelgroepen_prods[dgid].append(row[0]) for i in range(0, 17): if str(i) in doelgroepen_prods: if len(doelgroepen_prods[str(i)]) < 5: need = 5 - len(doelgroepen_prods[str(i)]) need_list = data[0:need + 10] need_list = random.sample(need_list, need) for n in need_list: doelgroepen_prods[str(i)].append(n[0]) else: doelgroepen_prods[str(i)] = [] need = 5 - len(doelgroepen_prods[str(i)]) need_list = data[0:need + 10] need_list = random.sample(need_list, need) for n in need_list: doelgroepen_prods[str(i)].append(n[0]) with open('profiles_doelgroepen.csv', 'r') as pd, open('passend_bij_uw_gedrag.csv', 'w', newline='', encoding='utf-8') as pbug_file: pd_reader = csv.reader(pd) pbug_fieldnames = [ "profileid", "productid1", "productid2", "productid3", "productid4", "productid5" ] pbug_writer = csv.DictWriter(pbug_file, fieldnames=pbug_fieldnames) pbug_writer.writeheader() lc = 0 for i in pd_reader: if lc != 0: matching_products = doelgroepen_prods[i[1]] pbug_writer.writerow({ "profileid": i[0], "productid1": matching_products[0], "productid2": matching_products[1], "productid3": matching_products[2], "productid4": matching_products[3], "productid5": matching_products[4] }) else: lc += 1 print(doelgroepen_prods)
def get_all_order(): data = bsql.select_data(cur, "SELECT * FROM orders;") print("Got all the orders") return data
def sort_all_products(): cnt = 0 query = ''' SELECT p.id, c.category, c.subcategory, c.subsubcategory, d.id FROM products as p, categories as c, doelgroepen as d WHERE c.id = p.categoriesid AND d.id = p.doelgroepenid''' data = bsql.select_data(cur, query) ldata = len(data) print("Got the data") with open('soort_gelijke_producten.csv', 'w', newline='', encoding='utf-8') as sgp: sgp_fieldnames = [ "productid", "productid1", "productid2", "productid3", "productid4", "productid5" ] sgp_writer = csv.DictWriter(sgp, fieldnames=sgp_fieldnames) sgp_writer.writeheader() print("Products 2 loop trhuuuuuuuu (ongeveer): ", ldata * ldata) for row in data: # Values 2 compare 2 pid = row[0] cat = row[1] scat = row[2] sscat = row[3] did = row[4] sscd = [] ssc = [] scd = [] sc = [] cd = [] c = [] d = [] n = [] for r in data: if r[0] != pid: if r[4] == did: if r[3] == sscat: sscd.append(r[0]) elif r[2] == scat: scd.append(r[0]) elif r[1] == cat: cd.append(r[0]) else: d.append(r[0]) else: if r[3] == sscat: ssc.append(r[0]) elif r[2] == scat: sc.append(r[0]) elif r[1] == cat: c.append(r[0]) all_lists = [d, c, cd, sc, scd, ssc, sscd] for l in all_lists: if len(l) >= 5: selection = random.sample(l, 5) sgp_writer.writerow({ "productid": pid, "productid1": selection[0], "productid2": selection[1], "productid3": selection[2], "productid4": selection[3], "productid5": selection[4] }) cnt += 1 if cnt % 100 == 0: print(cnt, '/', ldata)
def get(self, profileid, count, recommendationtype): connection = bsql.get_connection("Floris09", "huwebshop") cursor = bsql.get_cursor(connection) """ This function represents the handler for GET requests coming in through the API. It currently returns a random sample of products. """ prodids = [] if recommendationtype == "popular": cat = profileid cat = cat.replace("-", " ") cat = cat.replace(" en ", " & ") print(cat) data = bsql.select_data( cursor, f"SELECT * FROM andere_kochten_ook WHERE LOWER(category) = LOWER('{cat}');" ) for row in data: print(row) prodids = [row[1], row[2], row[3], row[4]] print(prodids) if recommendationtype == "similar": prod_id = profileid data = bsql.select_data( cursor, f"SELECT * FROM soort_gelijke_producten WHERE productid = '{prod_id}';" ) for row in data: print(row) prodids = [row[1], row[2], row[3], row[4]] print(prod_id) if recommendationtype == "behaviour": data = bsql.select_data( cursor, f"SELECT * FROM passend_bij_uw_gedrag WHERE profileid = '{profileid}';" ) for row in data: print(row) prodids = [row[1], row[2], row[3], row[4]] if recommendationtype == "combination": print(recommendationtype) prod_id = profileid data = bsql.select_data( cursor, f"SELECT * FROM combineert_goed_met WHERE productid = '{prod_id}'" ) for row in data: print(row) prodids = [row[1], row[2], row[3], row[4]] if recommendationtype == "personal": data = bsql.select_data( cursor, f"SELECT * FROM public.persoonlijk_aangeboden WHERE profileid = '{profileid}';" ) for row in data: print(row) prodids = [row[1], row[2], row[3], row[4]] return prodids, 200 randcursor = database.products.aggregate([{ '$sample': { 'size': count } }]) prodids = list(map(lambda x: x['_id'], list(randcursor))) return prodids, 200