def generateEdmEmailingList(connection, site_id): logger = logging.getLogger("EDMCalculations") c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) if latest_order_datetime is None: query = {} else: query = {"order_datetime": {"$gte": latest_order_datetime \ - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}} db = getSiteDB(connection, site_id) result = db.command({"distinct": "user_orders", "key": "user_id", "query": query}) user_ids = result["values"] mongo_client = MongoClient(connection) c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list") c_edm_emailing_list.drop() c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list") count = 0 t0 = time.time() for user_id in user_ids: count += 1 if count % 100 == 0: logger.info("Count: %s, %s users/sec" % (count, count/(time.time() - t0))) recommendation_result, _ = mongo_client.recommend_for_edm(site_id, user_id, max_amount=EXPECTED_RECOMMENDATION_ITEMS) if len(recommendation_result) == EXPECTED_RECOMMENDATION_ITEMS: c_edm_emailing_list.insert({"user_id": user_id, "recommendation_result": recommendation_result})
def updateHotViewList(self, site_id, today=None): if today is None: today = datetime.date.today() last_7_days_attr_names = self.getLast7DaysAttributeNames("v", today) c_traffic_metrics = getSiteDBCollection(self.connection, site_id, "traffic_metrics") res = c_traffic_metrics.aggregate( [ {"$project": { "item_id": "$item_id", "total_views": {"$add": last_7_days_attr_names} } }, {"$sort": {"total_views": -1}}, {"$limit": 10} ] ) result = res.get("result", []) if result: highest_views = max(1.0, float(result[0]["total_views"])) else: highest_views = 1.0 result = [(record["item_id"], record["total_views"]/ highest_views) for record in result] c_cached_hot_view = getSiteDBCollection(self.connection, site_id, "cached_hot_view") c_cached_hot_view.update({"type": HOT_INDEX_ALL_ITEMS}, {"type": HOT_INDEX_ALL_ITEMS, "result": result}, upsert=True)
def updateUserPurchasingHistory(self, site_id, user_id): # TODO: time consuming, defer to offline computing logging.critical("TODO: move offline updateUserPurchasingHistory - user_id: %s" % user_id) pass ph_in_db = self.getPurchasingHistory(site_id, user_id) c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs") cursor = c_raw_logs.find({"user_id": user_id, "behavior": "PLO"}).\ sort("created_on", -1).limit(self.MAX_PURCHASING_HISTORY_AMOUNT) is_items_enough = False purchasing_history = [] ph_map = {} for record_PLO in cursor: for order_item in record_PLO["order_content"]: item_id = order_item["item_id"] if item_id not in ph_map: purchasing_history.append(item_id) ph_map[item_id] = 1 if len(purchasing_history) > self.MAX_PURCHASING_HISTORY_AMOUNT: is_items_enough = True break if is_items_enough: break ph_in_db["purchasing_history"] = purchasing_history c_purchasing_history = getSiteDBCollection(self.connection, site_id, "purchasing_history") c_purchasing_history.save(ph_in_db)
def updateUserPurchasingHistory(self, site_id, user_id): # TODO: time consuming, defer to offline computing logging.critical( "TODO: move offline updateUserPurchasingHistory - user_id: %s" % user_id) pass ph_in_db = self.getPurchasingHistory(site_id, user_id) c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs") cursor = c_raw_logs.find({"user_id": user_id, "behavior": "PLO"}).\ sort("created_on", -1).limit(self.MAX_PURCHASING_HISTORY_AMOUNT) is_items_enough = False purchasing_history = [] ph_map = {} for record_PLO in cursor: for order_item in record_PLO["order_content"]: item_id = order_item["item_id"] if item_id not in ph_map: purchasing_history.append(item_id) ph_map[item_id] = 1 if len(purchasing_history ) > self.MAX_PURCHASING_HISTORY_AMOUNT: is_items_enough = True break if is_items_enough: break ph_in_db["purchasing_history"] = purchasing_history c_purchasing_history = getSiteDBCollection(self.connection, site_id, "purchasing_history") c_purchasing_history.save(ph_in_db)
def recommend_for_edm(self, site_id, user_id, max_amount=5): c_user_orders = getSiteDBCollection(self.connection, site_id, "user_orders") c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs") latest_user_order = [ user_order for user_order in c_user_orders.find({ "user_id": user_id }).sort("order_datetime", -1).limit(1) ][0] raw_log = c_raw_logs.find_one({"_id": latest_user_order["raw_log_id"]}) items_list = [ order_item["item_id"] for order_item in raw_log["order_content"] ] purchasing_history = self.getPurchasingHistory( site_id, user_id)["purchasing_history"] topn = self.calc_weighted_top_list_method1( site_id, "PLO", items_list, extra_excludes_list=purchasing_history) ref = "ref=edm" # to trace source in edm result = self.convertTopNFormat( site_id, req_id=None, result_filter=SimpleRecommendationResultFilter(), topn=topn, amount=max_amount, include_item_info=True, deduplicate_item_names_required=True, url_converter=lambda item_link, site_id, item_id, req_id, ref: item_link) return result
def __init__(self, connection, site_id, last_ts, output_file_path): self.connection = connection self.site_id = site_id self.raw_logs = utils.getSiteDBCollection( connection, site_id, "raw_logs") self.c_tmp_user_identified_logs_plo = utils.getSiteDBCollection( connection, site_id, "tmp_user_identified_logs_plo") self.last_ts = last_ts self.output_file_path = output_file_path self.tjbid2user = {}
def __init__(self, connection, site_id, last_ts, output_file_path): self.connection = connection self.site_id = site_id self.raw_logs = utils.getSiteDBCollection(connection, site_id, "raw_logs") self.c_tmp_user_identified_logs_plo = utils.getSiteDBCollection( connection, site_id, "tmp_user_identified_logs_plo") self.last_ts = last_ts self.output_file_path = output_file_path self.tjbid2user = {}
def recommend_for_edm(self, site_id, user_id, max_amount=5): c_user_orders = getSiteDBCollection(self.connection, site_id, "user_orders") c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs") latest_user_order = [user_order for user_order in c_user_orders.find({"user_id": user_id}).sort("order_datetime", -1).limit(1)][0] raw_log = c_raw_logs.find_one({"_id": latest_user_order["raw_log_id"]}) items_list = [order_item["item_id"] for order_item in raw_log["order_content"]] purchasing_history = self.getPurchasingHistory(site_id, user_id)["purchasing_history"] topn = self.calc_weighted_top_list_method1(site_id, "PLO", items_list, extra_excludes_list=purchasing_history) ref = "ref=edm" # to trace source in edm result = self.convertTopNFormat(site_id, req_id=None, result_filter=SimpleRecommendationResultFilter(), topn=topn, amount=max_amount, include_item_info=True, deduplicate_item_names_required=True, url_converter=lambda item_link, site_id, item_id, req_id, ref: item_link) return result
def fix_user_orders(connection, site_id): c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") c_user_orders.drop_indexes() c_user_orders.ensure_index("order_datetime", -1, background=True, unique=False)
def getEmailingUsers(connection, site_id, page_num, page_size): c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) if latest_order_datetime is None: query = {} else: query = { "order_datetime": {"$gte": latest_order_datetime - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)} } db = getSiteDB(connection, site_id) result = db.command({"distinct": "user_orders", "key": "user_id", "query": query}) user_ids = result["values"] selected_user_ids = user_ids[(page_num - 1) * page_size : page_num * page_size] max_page_num = len(user_ids) / page_size if len(user_ids) % page_size > 0: max_page_num += 1 page_num_left = max(page_num - 4, 1) page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left))) models = [{"user_id": user_id} for user_id in selected_user_ids] return { "models": models, "page": page_num, "page_size": page_size, "total": len(user_ids), "prev_page_num": max(1, page_num - 1), "page_nums": range(page_num_left, page_num_right + 1), "next_page_num": min(max_page_num, page_num + 1), "max_page_num": max_page_num, "curr_left_reached": page_num == 1, "curr_right_reached": page_num >= max_page_num, }
def updateRecord(connection, site_id, item_view_times_map, last_item_id1, last_rows): last_rows.sort(lambda a, b: sign(b[1] - a[1])) item1_total_views = item_view_times_map[last_item_id1] content_dict = { "item_id": last_item_id1, "total_views": item1_total_views, "viewedUltimatelyBuys": [] } for row in last_rows: item_id2, count = row content_dict["viewedUltimatelyBuys"].append({ "item_id": item_id2, "count": count, "percentage": count / item1_total_views }) c_viewed_ultimately_buys = getSiteDBCollection(connection, site_id, "viewed_ultimately_buys") c_viewed_ultimately_buys.update({"item_id": last_item_id1}, content_dict, upsert=True)
def ajax_item(request, api_key, item_id): user_name = request.session.get("user_name", None) #api_key = request.GET.get("api_key", None) _checkUserAccessSite(user_name, api_key) connection = mongo_client.connection c_sites = connection["tjb-db"]["sites"] site = c_sites.find_one({"api_key": api_key}) c_items = getSiteDBCollection(connection, site["site_id"], "items") item = c_items.find_one({"item_id": item_id}) black_list = itemInfoListFromItemIdList(site['site_id'], mongo_client.get_black_list(site['site_id'], item_id)) for black_item in black_list: black_item['is_black'] = True item_categories = ",".join([category["id"] for category in item["categories"]]) data = { 'item_id': item['item_id'], 'item_name': item['item_name'], 'item_link': item['item_link'], 'item_categories': item_categories, 'market_price': item.get('market_price', ''), 'price': item.get('price', ''), 'image_link': item.get('image_link', ''), 'available': item['available'], 'rec_lists':{ "also_viewed": _getTopnByAPI(site, "AlsoViewed", item_id, 15), "also_bought": _getTopnByAPI(site, "AlsoBought", item_id, 15), "bought_together": _getTopnByAPI(site, "BoughtTogether", item_id, 15), "ultimately_bought": _getUltimatelyBought(site, item_id, 15), "black_list": black_list } } return HttpResponse(json.dumps(data))
def getEmailingUsers(connection, site_id, page_num, page_size): c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) if latest_order_datetime is None: query = {} else: query = {"order_datetime": {"$gte": latest_order_datetime \ - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}} db = getSiteDB(connection, site_id) result = db.command({"distinct": "user_orders", "key": "user_id", "query": query}) user_ids = result["values"] selected_user_ids = user_ids[(page_num - 1) * page_size:page_num * page_size] max_page_num = len(user_ids) / page_size if len(user_ids) % page_size > 0: max_page_num += 1 page_num_left = max(page_num - 4, 1) page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left))) models = [{"user_id": user_id} for user_id in selected_user_ids] return {"models": models, "page": page_num, "page_size": page_size, "total": len(user_ids), "prev_page_num": max(1, page_num - 1), "page_nums": range(page_num_left, page_num_right + 1), "next_page_num": min(max_page_num, page_num + 1), "max_page_num": max_page_num, "curr_left_reached": page_num == 1, "curr_right_reached": page_num >= max_page_num}
def ajax_item(request, api_key, item_id): user_name = request.session.get("user_name", None) # api_key = request.GET.get("api_key", None) _checkUserAccessSite(user_name, api_key) connection = mongo_client.connection c_sites = connection["tjb-db"]["sites"] site = c_sites.find_one({"api_key": api_key}) c_items = getSiteDBCollection(connection, site["site_id"], "items") item = c_items.find_one({"item_id": item_id}) black_list = itemInfoListFromItemIdList(site["site_id"], mongo_client.get_black_list(site["site_id"], item_id)) for black_item in black_list: black_item["is_black"] = True data = { "item_id": item["item_id"], "item_name": item["item_name"], "item_link": item["item_link"], "item_categories": ",".join(item["categories"]), "market_price": item.get("market_price", ""), "price": item.get("price", ""), "image_link": item["image_link"], "available": item["available"], "rec_lists": { "also_viewed": _getTopnByAPI(site, "getAlsoViewed", item_id, 15), "also_bought": _getTopnByAPI(site, "getAlsoBought", item_id, 15), "bought_together": _getTopnByAPI(site, "getBoughtTogether", item_id, 15), "ultimately_bought": _getUltimatelyBought(site, item_id, 15), "black_list": black_list, }, } return HttpResponse(json.dumps(data))
def getHotViewList(self, site_id): c_cached_hot_view = getSiteDBCollection(self.connection, site_id, "cached_hot_view") cached = c_cached_hot_view.find_one({"type": HOT_INDEX_ALL_ITEMS}) if cached: return cached["result"] else: return []
def insertUserOrderFromRawLog(connection, site_id, raw_log): c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") amount = 0 for order_item in raw_log["order_content"]: amount += float(order_item["price"]) * int(order_item["amount"]) c_user_orders.insert({"user_id": raw_log["filled_user_id"], "order_datetime": raw_log["created_on"], "raw_log_id": raw_log["_id"], "amount": amount})
def getEdmEmailingUsers(connection, site_id, page_num, page_size): c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list") cursor = c_edm_emailing_list.find() record_processor = lambda record: {"user_id": record["user_id"]} return _getModelsByPages(connection, site_id, page_num, page_size, cursor, record_processor)
def get_black_list(self, site_id, item_id): c_rec_black_lists = getSiteDBCollection(self.connection, site_id, "rec_black_lists") row = c_rec_black_lists.find_one({"item_id": item_id}) if row is None: return [] else: return row["black_list"]
def ajax_item(request, api_key, item_id): user_name = request.session.get("user_name", None) #api_key = request.GET.get("api_key", None) _checkUserAccessSite(user_name, api_key) connection = mongo_client.connection c_sites = connection["tjb-db"]["sites"] site = c_sites.find_one({"api_key": api_key}) c_items = getSiteDBCollection(connection, site["site_id"], "items") item = c_items.find_one({"item_id": item_id}) black_list = itemInfoListFromItemIdList( site['site_id'], mongo_client.get_black_list(site['site_id'], item_id)) for black_item in black_list: black_item['is_black'] = True item_categories = ",".join( [category["id"] for category in item["categories"]]) data = { 'item_id': item['item_id'], 'item_name': item['item_name'], 'item_link': item['item_link'], 'item_categories': item_categories, 'market_price': item.get('market_price', ''), 'price': item.get('price', ''), 'image_link': item.get('image_link', ''), 'available': item['available'], 'rec_lists': { "also_viewed": _getTopnByAPI(site, "AlsoViewed", item_id, 15), "also_bought": _getTopnByAPI(site, "AlsoBought", item_id, 15), "bought_together": _getTopnByAPI(site, "BoughtTogether", item_id, 15), "ultimately_bought": _getUltimatelyBought(site, item_id, 15), "black_list": black_list } } return HttpResponse(json.dumps(data))
def removeItem(self, site_id, item_id): c_items = getSiteDBCollection(self.connection, site_id, "items") item_in_db = c_items.find_one({"item_id": item_id}) if item_in_db is not None: item_in_db["available"] = False item_in_db["removed_on"] = datetime.datetime.now() c_items.save(item_in_db)
def updateSearchTermsCache(self, site_id, cache_entry): c_search_terms_cache = getSiteDBCollection(self.connection, site_id, "search_terms_cache") terms_key = "|".join(cache_entry["terms"]) cache_entry["terms_key"] = terms_key c_search_terms_cache.update({"terms_key": terms_key}, cache_entry, upsert=True)
def fetchSearchTermsCacheEntry(self, site_id, terms): c_search_terms_cache = getSiteDBCollection(self.connection, site_id, "search_terms_cache") terms = copy.copy(terms) terms.sort() terms_key = "|".join(terms) cache_entry = c_search_terms_cache.find_one({"terms_key": terms_key}) return terms_key, cache_entry
def calculateKeywordHotViewList(self, site_id, today=None): if today is None: today = datetime.date.today() last_7_days_attr_names = self.getLast7DaysAttributeNames("k", today) c_keyword_metrics = getSiteDBCollection(self.connection, site_id, "keyword_metrics") res = c_keyword_metrics.aggregate( [ {"$project": { "keyword": 1, "count": {"$add": last_7_days_attr_names} } }, {"$group": { "_id": "$keyword", "count": {"$sum": "$count"} } }, {"$match": {"count": {"$gt": 0}}}, {"$sort": {"count": -1}}, {"$limit": 50} ] ) result = res.get("result", []) print "RES:", result topn = [record["_id"] for record in result if record["count"] >= settings.MINIMAL_KEYWORD_HOT_VIEW_COUNT] if len(topn) >= settings.MINIMAL_KEYWORD_HOT_VIEW_LENGTH: return {"null": topn} else: return {"null": []}
def getHotViewList(self, site_id, hot_index_type, category_id=None, brand=None): c_cached_hot_view = getSiteDBCollection(self.connection, site_id, "cached_hot_view") cached = c_cached_hot_view.find_one({"hot_index_type": hot_index_type, "category_id": category_id, "brand": brand}) if cached: return cached["result"] else: return []
def getBrowsingHistory(self, site_id, ptm_id): c_visitors = getSiteDBCollection(self.connection, site_id, "visitors") visitor = c_visitors.find_one({"ptm_id": ptm_id}) if visitor: return visitor["browsing_history"] else: return []
def updateTrafficMetricsFromLog(self, site_id, raw_log): c_traffic_metrics = getSiteDBCollection(self.connection, site_id, "traffic_metrics") behavior = raw_log.get("behavior", None) created_on = raw_log["created_on"] year, month, day, hour = created_on.year, created_on.month, created_on.day, created_on.hour if behavior == "V": item_id = raw_log["item_id"] c_traffic_metrics.update({"item_id": item_id}, {"$inc": { "v.%d.v" % year: 1, "v.%d.%d.v" % (year, month): 1, "v.%d.%d.%d.v" % (year, month, day): 1, "v.%d.%d.%d.%d.v" % (year, month, day, hour): 1, } }, upsert=True) elif behavior == "PLO": for order_row in raw_log["order_content"]: item_id = order_row["item_id"] c_traffic_metrics.update({"item_id": item_id}, {"$inc": { ("b.%d.b" % year): 1, ("b.%d.%d.b" % (year, month)): 1, ("b.%d.%d.%d.b" % (year, month, day)): 1, "b.%d.%d.%d.%d.b" % (year, month, day, hour): 1, } }, upsert=True)
def getSiteInfos(): connection = mongo_client.connection sites = mongo_client.loadSites() now = datetime.datetime.now() result = [] for site in sites: sci = {"site_id": site["site_id"], "site_name": site["site_name"], "disabledFlows": site.get("disabledFlows", [])} fillSiteCheckingDaemonInfo(connection, sci) calculation_records = getSiteDBCollection(connection, site["site_id"], "calculation_records") records = [row for row in calculation_records.find().sort("begin_datetime", -1).limit(1)] if records == []: sci["status"] = "NEVER_CALC" else: record = records[0] sci["last_calculation_id"] = record["calculation_id"] if record.has_key("end_datetime"): if record["is_successful"]: sci["status"] = "SUCCESSFUL" else: sci["status"] = "FAILED" sci["since_last"] = convertTimedeltaAsDaysHoursMinutesSeconds(now - record["end_datetime"]) sci["time_spent"] = convertTimedeltaAsDaysHoursMinutesSeconds(record["end_datetime"] - record["begin_datetime"]) est_next_run = max(record["end_datetime"] + datetime.timedelta(seconds=site["calc_interval"]) - now, datetime.timedelta(seconds=0)) if est_next_run == datetime.timedelta(seconds=0): sci["est_next_run"] = "as soon as possible" else: sci["est_next_run"] = convertTimedeltaAsDaysHoursMinutesSeconds(est_next_run) else: sci["status"] = "RUNNING" sci["time_spent"] = convertTimedeltaAsDaysHoursMinutesSeconds(now - record["begin_datetime"]) manual_calculation_list = connection["tjb-db"]["manual_calculation_list"] manual_calculation_request = manual_calculation_list.find_one({"site_id": site["site_id"]}) if manual_calculation_request is not None: request_datetime = manual_calculation_request["request_datetime"] sci["request_waiting_time"] = convertTimedeltaAsDaysHoursMinutesSeconds(now - request_datetime) c_items = getSiteDBCollection(connection, site["site_id"], "items") sci["all_items_count"] = c_items.find().count() sci["available_items_count"] = c_items.find({"available": True}).count() result.append(sci) return result
def getSimilaritiesForItems(self, site_id, similarity_type, item_ids): c_item_similarities = getSiteDBCollection(self.connection, site_id, "item_similarities_%s" % similarity_type) result = [] for row in c_item_similarities.find({"item_id": {"$in": item_ids}}): row["mostSimilarItems"] = self.apply_black_list2topn(site_id, row["item_id"], row["mostSimilarItems"]) result.append(row) return result
def fix_purchasing_history(connection, site_id): c_purchasing_history = getSiteDBCollection(connection, site_id, "purchasing_history") c_purchasing_history.drop_indexes() c_purchasing_history.ensure_index("user_id", "1", background=True, unique=True)
def fix_viewed_ultimately_buys(connection, site_id): c_viewed_ultimately_buys = getSiteDBCollection(connection, site_id, "viewed_ultimately_buys") c_viewed_ultimately_buys.drop_indexes() c_viewed_ultimately_buys.ensure_index("item_id", 1, background=True, unique=True)
def itemInfoListFromItemIdList(site_id, item_id_list): c_items = getSiteDBCollection(mongo_client.connection, site_id, "items") item_info_list = [item for item in c_items.find({"item_id": {"$in": item_id_list}}, {"item_id": 1, "item_name": 1, "item_link": 1, "image_link": ''} )] for item_info in item_info_list: del item_info["_id"] return item_info_list
def fix_items(connection, site_id): c_items = getSiteDBCollection(connection, site_id, "items") c_items.drop_indexes() c_items.ensure_index("item_name", 1, background=True, unique=False) c_items.ensure_index("item_id", 1, background=True, unique=True)#, drop_dups=True) c_items.ensure_index("created_on", -1, background=True, unique=False) c_items.ensure_index("created_on", 1, background=True, unique=False) c_items.ensure_index("removed_on", -1, background=True, unique=False) c_items.ensure_index("removed_on", 1, background=True, unique=False)
def fix_item_similarities_collections(connection, site_id): for similarity_type in ("V", "PLO", "BuyTogether"): c_item_similarities = getSiteDBCollection( connection, site_id, "item_similarities_%s" % similarity_type) c_item_similarities.drop_indexes() c_item_similarities.ensure_index("item_id", 1, background=True, unique=True)
def getSimilaritiesForItem(self, site_id, similarity_type, item_id): item_similarities = getSiteDBCollection(self.connection, site_id, "item_similarities_%s" % similarity_type) result = item_similarities.find_one({"item_id": item_id}, read_preference=ReadPreference.SECONDARY_PREFERRED) if result is not None: topn = result["mostSimilarItems"] else: topn = [] topn = self.apply_black_list2topn(site_id, item_id, topn) return topn
def toggle_black_list(self, site_id, item_id1, item_id2, is_on): c_rec_black_lists = getSiteDBCollection(self.connection, site_id, "rec_black_lists") rec_black_list = c_rec_black_lists.find_one({"item_id": item_id1}) if rec_black_list is None: c_rec_black_lists.insert({"item_id": item_id1, "black_list": []}) if is_on: c_rec_black_lists.update({"item_id": item_id1}, {"$addToSet": {"black_list": item_id2}}) else: c_rec_black_lists.update({"item_id": item_id1}, {"$pull": {"black_list": item_id2}})
def updateCategory(self, site_id, category): c_categories = getSiteDBCollection(self.connection, site_id, "categories") cat_in_db = c_categories.find_one({"category_id": category["category_id"]}) if cat_in_db is None: cat_in_db = {} else: cat_in_db = {"_id": cat_in_db["_id"]} cat_in_db.update(category) c_categories.save(cat_in_db)
def createCalculationRecord(site_id): calculation_id = str(uuid.uuid4()) record = { "calculation_id": calculation_id, "begin_datetime": datetime.datetime.now(), "flows": {}} calculation_records = getSiteDBCollection( connection, site_id, "calculation_records") calculation_records.save(record) return calculation_id
def getPurchasingHistory(self, site_id, user_id): c_purchasing_history = getSiteDBCollection(self.connection, site_id, "purchasing_history") ph_in_db = c_purchasing_history.find_one( {"user_id": user_id}, read_preference=ReadPreference.SECONDARY_PREFERRED) if ph_in_db is None: ph_in_db = {"user_id": user_id, "purchasing_history": []} return ph_in_db
def getSimilaritiesForItems(self, site_id, similarity_type, item_ids): c_item_similarities = getSiteDBCollection( self.connection, site_id, "item_similarities_%s" % similarity_type) result = [] for row in c_item_similarities.find({"item_id": {"$in": item_ids}}): row["mostSimilarItems"] = self.apply_black_list2topn( site_id, row["item_id"], row["mostSimilarItems"]) result.append(row) return result
def doUpdateUserOrdersCollection(connection, site_id): c_raw_logs = getSiteDBCollection(connection, site_id, "raw_logs") c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) query_condition = {"behavior": "PLO"} if latest_order_datetime is not None: query_condition["created_on"] = {"$gt": latest_order_datetime} # scan for and add new user_orders # NOTE: sort "created_on" to ensure scanning from oldest to newest (otherwise we will miss some logs next time if this process fails on the half way) for raw_log in c_raw_logs.find(query_condition).sort("created_on", 1): if raw_log.has_key("filled_user_id") and not raw_log["filled_user_id"].startswith("ANO_"): insertUserOrderFromRawLog(connection, site_id, raw_log) # process those raw_logs which was previously filled with an "ANO_" user id and now got identified as a registered user. c_tmp_user_identified_logs_plo = getSiteDBCollection(connection, site_id, "tmp_user_identified_logs_plo") for tmp_user_identified_log_plo in c_tmp_user_identified_logs_plo.find(): raw_log = c_raw_logs.find_one({"_id": tmp_user_identified_log_plo["log_id"]}) insertUserOrderFromRawLog(connection, site_id, raw_log) c_tmp_user_identified_logs_plo.remove({"_id": tmp_user_identified_log_plo["_id"]})
def getSimilaritiesForViewedUltimatelyBuy(self, site_id, item_id): viewed_ultimately_buys = getSiteDBCollection(self.connection, site_id, "viewed_ultimately_buys") result = viewed_ultimately_buys.find_one({"item_id": item_id}, read_preference=ReadPreference.SECONDARY_PREFERRED) if result is not None: vubs = result["viewedUltimatelyBuys"] else: vubs = [] topn = [(vubs_item["item_id"], vubs_item["percentage"]) for vubs_item in vubs] topn = self.apply_black_list2topn(site_id, item_id, topn) return topn
def upload_statistics(site_id, connection, client, data): c_statistics = getSiteDBCollection(connection, site_id, "statistics") date_str = data["date_str"] del data["date_str"] row_in_db = c_statistics.find_one({"date": date_str}) if row_in_db is None: row_in_db = {"date": date_str} for key in data.keys(): row_in_db.update(data) c_statistics.save(row_in_db)
def fix_items(connection, site_id): c_items = getSiteDBCollection(connection, site_id, "items") c_items.drop_indexes() c_items.ensure_index("item_name", 1, background=True, unique=False) c_items.ensure_index("item_id", 1, background=True, unique=True) #, drop_dups=True) c_items.ensure_index("created_on", -1, background=True, unique=False) c_items.ensure_index("created_on", 1, background=True, unique=False) c_items.ensure_index("removed_on", -1, background=True, unique=False) c_items.ensure_index("removed_on", 1, background=True, unique=False)
def updateProperty(self, site_id, property): c_properties = getSiteDBCollection(self.connection, site_id, "properties") prop_in_db = c_properties.find_one({"id": property["id"], 'type': property['type']}) if prop_in_db is None: prop_in_db = {} else: prop_in_db = {"_id": prop_in_db["_id"]} prop_in_db.update(property) c_properties.save(prop_in_db)
def getProperty(self, site_id, property_type, property_id): c_properties = getSiteDBCollection(self.connection, site_id, "properties") result = c_properties.find_one( { "type": property_type, "id": property_id }, read_preference=ReadPreference.SECONDARY_PREFERRED) return result
def dashboard(request): user_name = request.session["user_name"] sites = _getUserSites(user_name) connection = mongo_client.connection for site in sites: c_items = getSiteDBCollection(connection, site['site_id'], "items") site['items_count'] = c_items.find({"available": True}).count() return render_to_response("dashboard/index.html", {"page_name": "控制台首页", "sites": sites, "user_name": user_name, }, context_instance=RequestContext(request))
def itemInfoListFromItemIdList(site_id, item_id_list): c_items = getSiteDBCollection(mongo_client.connection, site_id, "items") item_info_list = [ item for item in c_items.find( {"item_id": {"$in": item_id_list}}, {"item_id": 1, "item_name": 1, "item_link": 1, "image_link": ""} ) ] for item_info in item_info_list: del item_info["_id"] return item_info_list
def updateCategory(self, site_id, category): c_categories = getSiteDBCollection(self.connection, site_id, "categories") cat_in_db = c_categories.find_one( {"category_id": category["category_id"]}) if cat_in_db is None: cat_in_db = {} else: cat_in_db = {"_id": cat_in_db["_id"]} cat_in_db.update(category) c_categories.save(cat_in_db)
def createCalculationRecord(site_id): calculation_id = str(uuid.uuid4()) record = { "calculation_id": calculation_id, "begin_datetime": datetime.datetime.now(), "flows": {} } calculation_records = getSiteDBCollection(connection, site_id, "calculation_records") calculation_records.save(record) return calculation_id
def insertUserOrderFromRawLog(connection, site_id, raw_log): c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") amount = 0 for order_item in raw_log["order_content"]: amount += float(order_item["price"]) * int(order_item["amount"]) c_user_orders.insert({ "user_id": raw_log["filled_user_id"], "order_datetime": raw_log["created_on"], "raw_log_id": raw_log["_id"], "amount": amount })
def fix_site_checking_daemon_logs(connection, site_id): c_site_checking_daemon_logs = getSiteDBCollection( connection, site_id, "site_checking_daemon_logs") c_site_checking_daemon_logs.drop_indexes() c_site_checking_daemon_logs.ensure_index("created_on", -1, background=True, unique=False) c_site_checking_daemon_logs.ensure_index("checking_id", 1, background=True, unique=True)
def fix_calculation_records(connection, site_id): c_calculation_records = getSiteDBCollection(connection, site_id, "calculation_records") c_calculation_records.drop_indexes() c_calculation_records.ensure_index("begin_datetime", -1, background=True, unique=False) c_calculation_records.ensure_index("end_datetime", -1, background=True, unique=False)
def getSimilaritiesForItem(self, site_id, similarity_type, item_id): item_similarities = getSiteDBCollection( self.connection, site_id, "item_similarities_%s" % similarity_type) result = item_similarities.find_one( {"item_id": item_id}, read_preference=ReadPreference.SECONDARY_PREFERRED) if result is not None: topn = result["mostSimilarItems"] else: topn = [] topn = self.apply_black_list2topn(site_id, item_id, topn) return topn