Ejemplo n.º 1
0
def generateEdmEmailingList(connection, site_id):
    logger = logging.getLogger("EDMCalculations")
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    if latest_order_datetime is None:
        query = {}
    else:
        query = {"order_datetime": {"$gte": latest_order_datetime \
                                - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}}
    db = getSiteDB(connection, site_id)
    result = db.command({"distinct": "user_orders", "key": "user_id", 
                "query": query})
    user_ids = result["values"]
    
    mongo_client = MongoClient(connection)
    c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list")
    c_edm_emailing_list.drop()
    c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list")
    count = 0
    t0 = time.time()
    for user_id in user_ids:
        count += 1
        if count % 100 == 0:
            logger.info("Count: %s, %s users/sec" % (count, count/(time.time() - t0)))
        recommendation_result, _ = mongo_client.recommend_for_edm(site_id, user_id, 
                                        max_amount=EXPECTED_RECOMMENDATION_ITEMS)
        if len(recommendation_result) == EXPECTED_RECOMMENDATION_ITEMS:
            c_edm_emailing_list.insert({"user_id": user_id, "recommendation_result": recommendation_result})
Ejemplo n.º 2
0
 def updateHotViewList(self, site_id, today=None):
     if today is None:
         today = datetime.date.today()
     last_7_days_attr_names = self.getLast7DaysAttributeNames("v", today)
     c_traffic_metrics = getSiteDBCollection(self.connection, site_id, "traffic_metrics")
     res = c_traffic_metrics.aggregate(
         [
         {"$project": {
             "item_id": "$item_id",
             "total_views": {"$add": last_7_days_attr_names}
         }
         },
         {"$sort": {"total_views": -1}},
         {"$limit": 10}
         ]
     )
     result = res.get("result", [])
     if result:
         highest_views = max(1.0, float(result[0]["total_views"]))
     else:
         highest_views = 1.0
     result = [(record["item_id"], record["total_views"]/ highest_views) for record in result]
     c_cached_hot_view = getSiteDBCollection(self.connection, site_id, "cached_hot_view")
     c_cached_hot_view.update({"type": HOT_INDEX_ALL_ITEMS}, 
                              {"type": HOT_INDEX_ALL_ITEMS, "result": result}, upsert=True)
Ejemplo n.º 3
0
 def updateUserPurchasingHistory(self, site_id, user_id):
     # TODO: time consuming, defer to offline computing
     logging.critical("TODO: move offline updateUserPurchasingHistory - user_id: %s" % user_id)
     pass
     ph_in_db = self.getPurchasingHistory(site_id, user_id)
     c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs")
     cursor = c_raw_logs.find({"user_id": user_id, "behavior": "PLO"}).\
             sort("created_on", -1).limit(self.MAX_PURCHASING_HISTORY_AMOUNT)
     is_items_enough = False
     purchasing_history = []
     ph_map = {}
     for record_PLO in cursor:
         for order_item in record_PLO["order_content"]:
             item_id = order_item["item_id"]
             if item_id not in ph_map:
                 purchasing_history.append(item_id)
                 ph_map[item_id] = 1
             if len(purchasing_history) > self.MAX_PURCHASING_HISTORY_AMOUNT:
                 is_items_enough = True
                 break
         if is_items_enough:
             break
     ph_in_db["purchasing_history"] = purchasing_history
     c_purchasing_history = getSiteDBCollection(self.connection, site_id, "purchasing_history")
     c_purchasing_history.save(ph_in_db)
Ejemplo n.º 4
0
 def updateUserPurchasingHistory(self, site_id, user_id):
     # TODO: time consuming, defer to offline computing
     logging.critical(
         "TODO: move offline updateUserPurchasingHistory - user_id: %s" %
         user_id)
     pass
     ph_in_db = self.getPurchasingHistory(site_id, user_id)
     c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs")
     cursor = c_raw_logs.find({"user_id": user_id, "behavior": "PLO"}).\
             sort("created_on", -1).limit(self.MAX_PURCHASING_HISTORY_AMOUNT)
     is_items_enough = False
     purchasing_history = []
     ph_map = {}
     for record_PLO in cursor:
         for order_item in record_PLO["order_content"]:
             item_id = order_item["item_id"]
             if item_id not in ph_map:
                 purchasing_history.append(item_id)
                 ph_map[item_id] = 1
             if len(purchasing_history
                    ) > self.MAX_PURCHASING_HISTORY_AMOUNT:
                 is_items_enough = True
                 break
         if is_items_enough:
             break
     ph_in_db["purchasing_history"] = purchasing_history
     c_purchasing_history = getSiteDBCollection(self.connection, site_id,
                                                "purchasing_history")
     c_purchasing_history.save(ph_in_db)
Ejemplo n.º 5
0
 def recommend_for_edm(self, site_id, user_id, max_amount=5):
     c_user_orders = getSiteDBCollection(self.connection, site_id,
                                         "user_orders")
     c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs")
     latest_user_order = [
         user_order for user_order in c_user_orders.find({
             "user_id": user_id
         }).sort("order_datetime", -1).limit(1)
     ][0]
     raw_log = c_raw_logs.find_one({"_id": latest_user_order["raw_log_id"]})
     items_list = [
         order_item["item_id"] for order_item in raw_log["order_content"]
     ]
     purchasing_history = self.getPurchasingHistory(
         site_id, user_id)["purchasing_history"]
     topn = self.calc_weighted_top_list_method1(
         site_id, "PLO", items_list, extra_excludes_list=purchasing_history)
     ref = "ref=edm"  # to trace source in edm
     result = self.convertTopNFormat(
         site_id,
         req_id=None,
         result_filter=SimpleRecommendationResultFilter(),
         topn=topn,
         amount=max_amount,
         include_item_info=True,
         deduplicate_item_names_required=True,
         url_converter=lambda item_link, site_id, item_id, req_id, ref:
         item_link)
     return result
Ejemplo n.º 6
0
 def __init__(self, connection, site_id, last_ts, output_file_path):
     self.connection = connection
     self.site_id = site_id
     self.raw_logs = utils.getSiteDBCollection(
         connection, site_id, "raw_logs")
     self.c_tmp_user_identified_logs_plo = utils.getSiteDBCollection(
         connection, site_id, "tmp_user_identified_logs_plo")
     self.last_ts = last_ts
     self.output_file_path = output_file_path
     self.tjbid2user = {}
Ejemplo n.º 7
0
 def __init__(self, connection, site_id, last_ts, output_file_path):
     self.connection = connection
     self.site_id = site_id
     self.raw_logs = utils.getSiteDBCollection(connection, site_id,
                                               "raw_logs")
     self.c_tmp_user_identified_logs_plo = utils.getSiteDBCollection(
         connection, site_id, "tmp_user_identified_logs_plo")
     self.last_ts = last_ts
     self.output_file_path = output_file_path
     self.tjbid2user = {}
Ejemplo n.º 8
0
 def recommend_for_edm(self, site_id, user_id, max_amount=5):
     c_user_orders = getSiteDBCollection(self.connection, site_id, "user_orders")
     c_raw_logs = getSiteDBCollection(self.connection, site_id, "raw_logs")
     latest_user_order = [user_order for user_order in c_user_orders.find({"user_id": user_id}).sort("order_datetime", -1).limit(1)][0]
     raw_log = c_raw_logs.find_one({"_id": latest_user_order["raw_log_id"]})
     items_list = [order_item["item_id"] for order_item in raw_log["order_content"]]
     purchasing_history = self.getPurchasingHistory(site_id, user_id)["purchasing_history"]
     topn = self.calc_weighted_top_list_method1(site_id, "PLO", items_list, extra_excludes_list=purchasing_history)
     ref = "ref=edm" # to trace source in edm
     result = self.convertTopNFormat(site_id, req_id=None, result_filter=SimpleRecommendationResultFilter(),
                     topn=topn, amount=max_amount, include_item_info=True, deduplicate_item_names_required=True,
                     url_converter=lambda item_link, site_id, item_id, req_id, ref: item_link)
     return result
Ejemplo n.º 9
0
def fix_user_orders(connection, site_id):
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    c_user_orders.drop_indexes()
    c_user_orders.ensure_index("order_datetime",
                               -1,
                               background=True,
                               unique=False)
Ejemplo n.º 10
0
def getEmailingUsers(connection, site_id, page_num, page_size):
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    if latest_order_datetime is None:
        query = {}
    else:
        query = {
            "order_datetime": {"$gte": latest_order_datetime - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}
        }
    db = getSiteDB(connection, site_id)
    result = db.command({"distinct": "user_orders", "key": "user_id", "query": query})
    user_ids = result["values"]
    selected_user_ids = user_ids[(page_num - 1) * page_size : page_num * page_size]
    max_page_num = len(user_ids) / page_size
    if len(user_ids) % page_size > 0:
        max_page_num += 1
    page_num_left = max(page_num - 4, 1)
    page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left)))
    models = [{"user_id": user_id} for user_id in selected_user_ids]
    return {
        "models": models,
        "page": page_num,
        "page_size": page_size,
        "total": len(user_ids),
        "prev_page_num": max(1, page_num - 1),
        "page_nums": range(page_num_left, page_num_right + 1),
        "next_page_num": min(max_page_num, page_num + 1),
        "max_page_num": max_page_num,
        "curr_left_reached": page_num == 1,
        "curr_right_reached": page_num >= max_page_num,
    }
Ejemplo n.º 11
0
def updateRecord(connection, site_id, item_view_times_map, last_item_id1,
                 last_rows):
    last_rows.sort(lambda a, b: sign(b[1] - a[1]))

    item1_total_views = item_view_times_map[last_item_id1]
    content_dict = {
        "item_id": last_item_id1,
        "total_views": item1_total_views,
        "viewedUltimatelyBuys": []
    }
    for row in last_rows:
        item_id2, count = row
        content_dict["viewedUltimatelyBuys"].append({
            "item_id":
            item_id2,
            "count":
            count,
            "percentage":
            count / item1_total_views
        })

    c_viewed_ultimately_buys = getSiteDBCollection(connection, site_id,
                                                   "viewed_ultimately_buys")
    c_viewed_ultimately_buys.update({"item_id": last_item_id1},
                                    content_dict,
                                    upsert=True)
Ejemplo n.º 12
0
def ajax_item(request, api_key, item_id):
    user_name = request.session.get("user_name", None)
    #api_key = request.GET.get("api_key", None)
    _checkUserAccessSite(user_name, api_key)
    connection = mongo_client.connection
    c_sites = connection["tjb-db"]["sites"]
    site = c_sites.find_one({"api_key": api_key})
    c_items = getSiteDBCollection(connection, site["site_id"], "items")
    item = c_items.find_one({"item_id": item_id})
    black_list = itemInfoListFromItemIdList(site['site_id'], mongo_client.get_black_list(site['site_id'], item_id))
    for black_item in black_list:
        black_item['is_black'] = True
    item_categories = ",".join([category["id"] for category in item["categories"]])
    data = {
            'item_id': item['item_id'],
            'item_name': item['item_name'],
            'item_link': item['item_link'],
            'item_categories': item_categories,
            'market_price': item.get('market_price', ''),
            'price': item.get('price', ''),
            'image_link': item.get('image_link', ''),
            'available': item['available'],
            'rec_lists':{
                "also_viewed": _getTopnByAPI(site, "AlsoViewed", item_id, 15),
                "also_bought": _getTopnByAPI(site, "AlsoBought", item_id, 15),
                "bought_together": _getTopnByAPI(site, "BoughtTogether", item_id, 15),
                "ultimately_bought": _getUltimatelyBought(site, item_id, 15),
                "black_list": black_list
                }
            }
    return HttpResponse(json.dumps(data))
Ejemplo n.º 13
0
def getEmailingUsers(connection, site_id, page_num, page_size):
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    if latest_order_datetime is None:
        query = {}
    else:
        query = {"order_datetime": {"$gte": latest_order_datetime \
                                - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}}
    db = getSiteDB(connection, site_id)
    result = db.command({"distinct": "user_orders", "key": "user_id", 
                "query": query})
    user_ids = result["values"]
    selected_user_ids = user_ids[(page_num - 1) * page_size:page_num * page_size]
    max_page_num = len(user_ids) / page_size
    if len(user_ids) % page_size > 0:
        max_page_num += 1
    page_num_left = max(page_num - 4, 1)
    page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left)))
    models = [{"user_id": user_id} for user_id in selected_user_ids]
    return {"models": models, 
            "page": page_num,
            "page_size": page_size,
            "total": len(user_ids),
            "prev_page_num": max(1, page_num - 1),
            "page_nums": range(page_num_left, page_num_right + 1),
            "next_page_num": min(max_page_num, page_num + 1),
            "max_page_num": max_page_num,
            "curr_left_reached": page_num == 1,
            "curr_right_reached": page_num >= max_page_num}
Ejemplo n.º 14
0
def ajax_item(request, api_key, item_id):
    user_name = request.session.get("user_name", None)
    # api_key = request.GET.get("api_key", None)
    _checkUserAccessSite(user_name, api_key)
    connection = mongo_client.connection
    c_sites = connection["tjb-db"]["sites"]
    site = c_sites.find_one({"api_key": api_key})
    c_items = getSiteDBCollection(connection, site["site_id"], "items")
    item = c_items.find_one({"item_id": item_id})
    black_list = itemInfoListFromItemIdList(site["site_id"], mongo_client.get_black_list(site["site_id"], item_id))
    for black_item in black_list:
        black_item["is_black"] = True
    data = {
        "item_id": item["item_id"],
        "item_name": item["item_name"],
        "item_link": item["item_link"],
        "item_categories": ",".join(item["categories"]),
        "market_price": item.get("market_price", ""),
        "price": item.get("price", ""),
        "image_link": item["image_link"],
        "available": item["available"],
        "rec_lists": {
            "also_viewed": _getTopnByAPI(site, "getAlsoViewed", item_id, 15),
            "also_bought": _getTopnByAPI(site, "getAlsoBought", item_id, 15),
            "bought_together": _getTopnByAPI(site, "getBoughtTogether", item_id, 15),
            "ultimately_bought": _getUltimatelyBought(site, item_id, 15),
            "black_list": black_list,
        },
    }
    return HttpResponse(json.dumps(data))
Ejemplo n.º 15
0
 def getHotViewList(self, site_id):
     c_cached_hot_view = getSiteDBCollection(self.connection, site_id, "cached_hot_view")
     cached = c_cached_hot_view.find_one({"type": HOT_INDEX_ALL_ITEMS})
     if cached:
         return cached["result"]
     else:
         return []
Ejemplo n.º 16
0
def insertUserOrderFromRawLog(connection, site_id, raw_log):
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    amount = 0
    for order_item in raw_log["order_content"]:
        amount += float(order_item["price"]) * int(order_item["amount"])
    c_user_orders.insert({"user_id": raw_log["filled_user_id"], "order_datetime": raw_log["created_on"],
                          "raw_log_id": raw_log["_id"], "amount": amount})
Ejemplo n.º 17
0
def getEdmEmailingUsers(connection, site_id, page_num, page_size):
    c_edm_emailing_list = getSiteDBCollection(connection, site_id,
                                              "edm_emailing_list")
    cursor = c_edm_emailing_list.find()
    record_processor = lambda record: {"user_id": record["user_id"]}
    return _getModelsByPages(connection, site_id, page_num, page_size, cursor,
                             record_processor)
Ejemplo n.º 18
0
 def get_black_list(self, site_id, item_id):
     c_rec_black_lists = getSiteDBCollection(self.connection, site_id, "rec_black_lists")
     row = c_rec_black_lists.find_one({"item_id": item_id})
     if row is None:
         return []
     else:
         return row["black_list"]
Ejemplo n.º 19
0
def ajax_item(request, api_key, item_id):
    user_name = request.session.get("user_name", None)
    #api_key = request.GET.get("api_key", None)
    _checkUserAccessSite(user_name, api_key)
    connection = mongo_client.connection
    c_sites = connection["tjb-db"]["sites"]
    site = c_sites.find_one({"api_key": api_key})
    c_items = getSiteDBCollection(connection, site["site_id"], "items")
    item = c_items.find_one({"item_id": item_id})
    black_list = itemInfoListFromItemIdList(
        site['site_id'], mongo_client.get_black_list(site['site_id'], item_id))
    for black_item in black_list:
        black_item['is_black'] = True
    item_categories = ",".join(
        [category["id"] for category in item["categories"]])
    data = {
        'item_id': item['item_id'],
        'item_name': item['item_name'],
        'item_link': item['item_link'],
        'item_categories': item_categories,
        'market_price': item.get('market_price', ''),
        'price': item.get('price', ''),
        'image_link': item.get('image_link', ''),
        'available': item['available'],
        'rec_lists': {
            "also_viewed": _getTopnByAPI(site, "AlsoViewed", item_id, 15),
            "also_bought": _getTopnByAPI(site, "AlsoBought", item_id, 15),
            "bought_together": _getTopnByAPI(site, "BoughtTogether", item_id,
                                             15),
            "ultimately_bought": _getUltimatelyBought(site, item_id, 15),
            "black_list": black_list
        }
    }
    return HttpResponse(json.dumps(data))
Ejemplo n.º 20
0
 def removeItem(self, site_id, item_id):
     c_items = getSiteDBCollection(self.connection, site_id, "items")
     item_in_db = c_items.find_one({"item_id": item_id})
     if item_in_db is not None:
         item_in_db["available"] = False
         item_in_db["removed_on"] = datetime.datetime.now()
         c_items.save(item_in_db)
Ejemplo n.º 21
0
 def updateSearchTermsCache(self, site_id, cache_entry):
     c_search_terms_cache = getSiteDBCollection(self.connection, site_id, "search_terms_cache")
     terms_key = "|".join(cache_entry["terms"])
     cache_entry["terms_key"] = terms_key
     c_search_terms_cache.update({"terms_key": terms_key},
                                 cache_entry,
                                 upsert=True)
Ejemplo n.º 22
0
 def fetchSearchTermsCacheEntry(self, site_id, terms):
     c_search_terms_cache = getSiteDBCollection(self.connection, site_id, "search_terms_cache")
     terms = copy.copy(terms)
     terms.sort()
     terms_key = "|".join(terms)
     cache_entry = c_search_terms_cache.find_one({"terms_key": terms_key})
     return terms_key, cache_entry
Ejemplo n.º 23
0
 def calculateKeywordHotViewList(self, site_id, today=None):
     if today is None:
         today = datetime.date.today()
     last_7_days_attr_names = self.getLast7DaysAttributeNames("k", today)
     c_keyword_metrics = getSiteDBCollection(self.connection, site_id, "keyword_metrics")
     res = c_keyword_metrics.aggregate(
         [
         {"$project": {
             "keyword": 1,
             "count": {"$add": last_7_days_attr_names}
         }
         },
         {"$group": {
             "_id": "$keyword",
             "count": {"$sum": "$count"}
             }
         },
         {"$match": {"count": {"$gt": 0}}},
         {"$sort": {"count": -1}},
         {"$limit": 50}
         ]
     )
     result = res.get("result", [])
     print "RES:", result
     topn = [record["_id"] for record in result if record["count"] >= settings.MINIMAL_KEYWORD_HOT_VIEW_COUNT]
     if len(topn) >= settings.MINIMAL_KEYWORD_HOT_VIEW_LENGTH:
         return {"null": topn}
     else:
         return {"null": []}
Ejemplo n.º 24
0
 def getHotViewList(self, site_id, hot_index_type, category_id=None, brand=None):
     c_cached_hot_view = getSiteDBCollection(self.connection, site_id, "cached_hot_view")
     cached = c_cached_hot_view.find_one({"hot_index_type": hot_index_type, "category_id": category_id, "brand": brand})
     if cached:
         return cached["result"]
     else:
         return []
Ejemplo n.º 25
0
 def getBrowsingHistory(self, site_id, ptm_id):
     c_visitors = getSiteDBCollection(self.connection, site_id, "visitors")
     visitor = c_visitors.find_one({"ptm_id": ptm_id})
     if visitor:
         return visitor["browsing_history"]
     else:
         return []
Ejemplo n.º 26
0
 def getBrowsingHistory(self, site_id, ptm_id):
     c_visitors = getSiteDBCollection(self.connection, site_id, "visitors")
     visitor = c_visitors.find_one({"ptm_id": ptm_id})
     if visitor:
         return visitor["browsing_history"]
     else:
         return []
Ejemplo n.º 27
0
 def removeItem(self, site_id, item_id):
     c_items = getSiteDBCollection(self.connection, site_id, "items")
     item_in_db = c_items.find_one({"item_id": item_id})
     if item_in_db is not None:
         item_in_db["available"] = False
         item_in_db["removed_on"] = datetime.datetime.now()
         c_items.save(item_in_db)
Ejemplo n.º 28
0
 def updateTrafficMetricsFromLog(self, site_id, raw_log):
     c_traffic_metrics = getSiteDBCollection(self.connection, site_id, "traffic_metrics")
     behavior = raw_log.get("behavior", None)
     created_on = raw_log["created_on"]
     year, month, day, hour = created_on.year, created_on.month, created_on.day, created_on.hour
     if behavior == "V":
         item_id = raw_log["item_id"]
         c_traffic_metrics.update({"item_id": item_id},
                 {"$inc": {
                     "v.%d.v" % year: 1,
                     "v.%d.%d.v" % (year, month): 1,
                     "v.%d.%d.%d.v" % (year, month, day): 1,
                     "v.%d.%d.%d.%d.v" % (year, month, day, hour): 1,
                 }
                 },
                 upsert=True)
     elif behavior == "PLO":
         for order_row in raw_log["order_content"]:
             item_id = order_row["item_id"]
             c_traffic_metrics.update({"item_id": item_id},
                 {"$inc": {
                     ("b.%d.b" % year): 1,
                     ("b.%d.%d.b" % (year, month)): 1,
                     ("b.%d.%d.%d.b" % (year, month, day)): 1,
                      "b.%d.%d.%d.%d.b" % (year, month, day, hour): 1,
                 }
                 },
                 upsert=True)
Ejemplo n.º 29
0
Archivo: views.py Proyecto: stipid/poco
def getSiteInfos():
    connection = mongo_client.connection
    sites = mongo_client.loadSites()
    now = datetime.datetime.now()
    result = []
    for site in sites:
        sci = {"site_id": site["site_id"], "site_name": site["site_name"], 
               "disabledFlows": site.get("disabledFlows", [])}
        fillSiteCheckingDaemonInfo(connection, sci)
        calculation_records = getSiteDBCollection(connection, site["site_id"], 
                                    "calculation_records")
        records = [row for row in calculation_records.find().sort("begin_datetime", -1).limit(1)]
        if records == []:
            sci["status"] = "NEVER_CALC"
        else:
            record = records[0]
            sci["last_calculation_id"] = record["calculation_id"]
            if record.has_key("end_datetime"):
                if record["is_successful"]:
                    sci["status"] = "SUCCESSFUL"
                else:
                    sci["status"] = "FAILED"
                sci["since_last"] = convertTimedeltaAsDaysHoursMinutesSeconds(now - record["end_datetime"])
                sci["time_spent"] = convertTimedeltaAsDaysHoursMinutesSeconds(record["end_datetime"] - record["begin_datetime"])
                est_next_run = max(record["end_datetime"] + datetime.timedelta(seconds=site["calc_interval"]) - now, 
                                   datetime.timedelta(seconds=0))
                if est_next_run == datetime.timedelta(seconds=0):
                    sci["est_next_run"] = "as soon as possible"
                else:
                    sci["est_next_run"] = convertTimedeltaAsDaysHoursMinutesSeconds(est_next_run)
            else:
                sci["status"] = "RUNNING"
                sci["time_spent"] = convertTimedeltaAsDaysHoursMinutesSeconds(now - record["begin_datetime"])

        manual_calculation_list = connection["tjb-db"]["manual_calculation_list"]
        manual_calculation_request = manual_calculation_list.find_one({"site_id": site["site_id"]})
        if manual_calculation_request is not None:
            request_datetime = manual_calculation_request["request_datetime"]
            sci["request_waiting_time"] = convertTimedeltaAsDaysHoursMinutesSeconds(now - request_datetime)

        c_items = getSiteDBCollection(connection, site["site_id"], "items")
        sci["all_items_count"] = c_items.find().count()
        sci["available_items_count"] = c_items.find({"available": True}).count()

        result.append(sci)

    return result
Ejemplo n.º 30
0
 def getSimilaritiesForItems(self, site_id, similarity_type, item_ids):
     c_item_similarities = getSiteDBCollection(self.connection, site_id, "item_similarities_%s" % similarity_type)
     result = []
     for row in c_item_similarities.find({"item_id": {"$in": item_ids}}):
         row["mostSimilarItems"] = self.apply_black_list2topn(site_id, row["item_id"],
                                     row["mostSimilarItems"])
         result.append(row)
     return result
Ejemplo n.º 31
0
def fix_purchasing_history(connection, site_id):
    c_purchasing_history = getSiteDBCollection(connection, site_id,
                                               "purchasing_history")
    c_purchasing_history.drop_indexes()
    c_purchasing_history.ensure_index("user_id",
                                      "1",
                                      background=True,
                                      unique=True)
Ejemplo n.º 32
0
 def fetchSearchTermsCacheEntry(self, site_id, terms):
     c_search_terms_cache = getSiteDBCollection(self.connection, site_id,
                                                "search_terms_cache")
     terms = copy.copy(terms)
     terms.sort()
     terms_key = "|".join(terms)
     cache_entry = c_search_terms_cache.find_one({"terms_key": terms_key})
     return terms_key, cache_entry
Ejemplo n.º 33
0
 def updateSearchTermsCache(self, site_id, cache_entry):
     c_search_terms_cache = getSiteDBCollection(self.connection, site_id,
                                                "search_terms_cache")
     terms_key = "|".join(cache_entry["terms"])
     cache_entry["terms_key"] = terms_key
     c_search_terms_cache.update({"terms_key": terms_key},
                                 cache_entry,
                                 upsert=True)
Ejemplo n.º 34
0
def fix_viewed_ultimately_buys(connection, site_id):
    c_viewed_ultimately_buys = getSiteDBCollection(connection, site_id,
                                                   "viewed_ultimately_buys")
    c_viewed_ultimately_buys.drop_indexes()
    c_viewed_ultimately_buys.ensure_index("item_id",
                                          1,
                                          background=True,
                                          unique=True)
Ejemplo n.º 35
0
def itemInfoListFromItemIdList(site_id, item_id_list):
    c_items = getSiteDBCollection(mongo_client.connection, site_id, "items")
    item_info_list = [item for item in c_items.find({"item_id": {"$in": item_id_list}},
        {"item_id": 1, "item_name": 1, "item_link": 1, "image_link": ''}
                                  )]
    for item_info in item_info_list:
        del item_info["_id"]
    return item_info_list
Ejemplo n.º 36
0
 def get_black_list(self, site_id, item_id):
     c_rec_black_lists = getSiteDBCollection(self.connection, site_id,
                                             "rec_black_lists")
     row = c_rec_black_lists.find_one({"item_id": item_id})
     if row is None:
         return []
     else:
         return row["black_list"]
Ejemplo n.º 37
0
def fix_items(connection, site_id):
    c_items = getSiteDBCollection(connection, site_id, "items")
    c_items.drop_indexes()
    c_items.ensure_index("item_name", 1, background=True, unique=False)
    c_items.ensure_index("item_id", 1, background=True, unique=True)#, drop_dups=True)
    c_items.ensure_index("created_on", -1, background=True, unique=False)
    c_items.ensure_index("created_on", 1, background=True, unique=False)
    c_items.ensure_index("removed_on", -1, background=True, unique=False)
    c_items.ensure_index("removed_on", 1, background=True, unique=False)
Ejemplo n.º 38
0
def fix_item_similarities_collections(connection, site_id):
    for similarity_type in ("V", "PLO", "BuyTogether"):
        c_item_similarities = getSiteDBCollection(
            connection, site_id, "item_similarities_%s" % similarity_type)
        c_item_similarities.drop_indexes()
        c_item_similarities.ensure_index("item_id",
                                         1,
                                         background=True,
                                         unique=True)
Ejemplo n.º 39
0
 def getSimilaritiesForItem(self, site_id, similarity_type, item_id):
     item_similarities = getSiteDBCollection(self.connection, site_id, "item_similarities_%s" % similarity_type)
     result = item_similarities.find_one({"item_id": item_id}, read_preference=ReadPreference.SECONDARY_PREFERRED)
     if result is not None:
         topn = result["mostSimilarItems"]
     else:
         topn = []
     topn = self.apply_black_list2topn(site_id, item_id, topn)
     return topn
Ejemplo n.º 40
0
 def toggle_black_list(self, site_id, item_id1, item_id2, is_on):
     c_rec_black_lists = getSiteDBCollection(self.connection, site_id, "rec_black_lists")
     rec_black_list = c_rec_black_lists.find_one({"item_id": item_id1})
     if rec_black_list is None:
         c_rec_black_lists.insert({"item_id": item_id1, "black_list": []})
     if is_on:
         c_rec_black_lists.update({"item_id": item_id1}, {"$addToSet": {"black_list": item_id2}})
     else:
         c_rec_black_lists.update({"item_id": item_id1}, {"$pull":  {"black_list": item_id2}})
Ejemplo n.º 41
0
 def updateCategory(self, site_id, category):
     c_categories = getSiteDBCollection(self.connection, site_id, "categories")
     cat_in_db = c_categories.find_one({"category_id": category["category_id"]})
     if cat_in_db is None:
         cat_in_db = {}
     else:
         cat_in_db = {"_id": cat_in_db["_id"]}
     cat_in_db.update(category)
     c_categories.save(cat_in_db)
Ejemplo n.º 42
0
def createCalculationRecord(site_id):
    calculation_id = str(uuid.uuid4())
    record = {
        "calculation_id": calculation_id, "begin_datetime": datetime.datetime.now(),
        "flows": {}}
    calculation_records = getSiteDBCollection(
        connection, site_id, "calculation_records")
    calculation_records.save(record)
    return calculation_id
Ejemplo n.º 43
0
 def getPurchasingHistory(self, site_id, user_id):
     c_purchasing_history = getSiteDBCollection(self.connection, site_id,
                                                "purchasing_history")
     ph_in_db = c_purchasing_history.find_one(
         {"user_id": user_id},
         read_preference=ReadPreference.SECONDARY_PREFERRED)
     if ph_in_db is None:
         ph_in_db = {"user_id": user_id, "purchasing_history": []}
     return ph_in_db
Ejemplo n.º 44
0
 def getSimilaritiesForItems(self, site_id, similarity_type, item_ids):
     c_item_similarities = getSiteDBCollection(
         self.connection, site_id, "item_similarities_%s" % similarity_type)
     result = []
     for row in c_item_similarities.find({"item_id": {"$in": item_ids}}):
         row["mostSimilarItems"] = self.apply_black_list2topn(
             site_id, row["item_id"], row["mostSimilarItems"])
         result.append(row)
     return result
Ejemplo n.º 45
0
def doUpdateUserOrdersCollection(connection, site_id):
    c_raw_logs = getSiteDBCollection(connection, site_id, "raw_logs")
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    query_condition = {"behavior": "PLO"}
    if latest_order_datetime is not None:
        query_condition["created_on"] = {"$gt": latest_order_datetime}
    # scan for and add new user_orders
    # NOTE: sort "created_on" to ensure scanning from oldest to newest (otherwise we will miss some logs next time if this process fails on the half way)
    for raw_log in c_raw_logs.find(query_condition).sort("created_on", 1):
        if raw_log.has_key("filled_user_id") and not raw_log["filled_user_id"].startswith("ANO_"):
            insertUserOrderFromRawLog(connection, site_id, raw_log)
    # process those raw_logs which was previously filled with an "ANO_" user id and now got identified as a registered user.
    c_tmp_user_identified_logs_plo = getSiteDBCollection(connection, site_id, "tmp_user_identified_logs_plo")
    for tmp_user_identified_log_plo in c_tmp_user_identified_logs_plo.find():
        raw_log = c_raw_logs.find_one({"_id": tmp_user_identified_log_plo["log_id"]})
        insertUserOrderFromRawLog(connection, site_id, raw_log)
        c_tmp_user_identified_logs_plo.remove({"_id": tmp_user_identified_log_plo["_id"]})
Ejemplo n.º 46
0
 def getSimilaritiesForViewedUltimatelyBuy(self, site_id, item_id):
     viewed_ultimately_buys = getSiteDBCollection(self.connection, site_id, "viewed_ultimately_buys")
     result = viewed_ultimately_buys.find_one({"item_id": item_id}, read_preference=ReadPreference.SECONDARY_PREFERRED)
     if result is not None:
         vubs = result["viewedUltimatelyBuys"]
     else:
         vubs = []
     topn = [(vubs_item["item_id"], vubs_item["percentage"]) for vubs_item in vubs]
     topn = self.apply_black_list2topn(site_id, item_id, topn)
     return topn
Ejemplo n.º 47
0
def upload_statistics(site_id, connection, client, data):
    c_statistics = getSiteDBCollection(connection, site_id, "statistics")
    date_str = data["date_str"]
    del data["date_str"]
    row_in_db = c_statistics.find_one({"date": date_str})
    if row_in_db is None:
        row_in_db = {"date": date_str}
    for key in data.keys():
        row_in_db.update(data)
    c_statistics.save(row_in_db)
Ejemplo n.º 48
0
def fix_items(connection, site_id):
    c_items = getSiteDBCollection(connection, site_id, "items")
    c_items.drop_indexes()
    c_items.ensure_index("item_name", 1, background=True, unique=False)
    c_items.ensure_index("item_id", 1, background=True,
                         unique=True)  #, drop_dups=True)
    c_items.ensure_index("created_on", -1, background=True, unique=False)
    c_items.ensure_index("created_on", 1, background=True, unique=False)
    c_items.ensure_index("removed_on", -1, background=True, unique=False)
    c_items.ensure_index("removed_on", 1, background=True, unique=False)
Ejemplo n.º 49
0
 def updateProperty(self, site_id, property):
     c_properties = getSiteDBCollection(self.connection, site_id, "properties")
     prop_in_db = c_properties.find_one({"id": property["id"],
                                         'type': property['type']})
     if prop_in_db is None:
         prop_in_db = {}
     else:
         prop_in_db = {"_id": prop_in_db["_id"]}
     prop_in_db.update(property)
     c_properties.save(prop_in_db)
Ejemplo n.º 50
0
def upload_statistics(site_id, connection, client, data):
    c_statistics = getSiteDBCollection(connection, site_id, "statistics")
    date_str = data["date_str"]
    del data["date_str"]
    row_in_db = c_statistics.find_one({"date": date_str})
    if row_in_db is None:
        row_in_db = {"date": date_str}
    for key in data.keys():
        row_in_db.update(data)
    c_statistics.save(row_in_db)
Ejemplo n.º 51
0
 def getProperty(self, site_id, property_type, property_id):
     c_properties = getSiteDBCollection(self.connection, site_id,
                                        "properties")
     result = c_properties.find_one(
         {
             "type": property_type,
             "id": property_id
         },
         read_preference=ReadPreference.SECONDARY_PREFERRED)
     return result
Ejemplo n.º 52
0
def dashboard(request):
    user_name = request.session["user_name"]
    sites = _getUserSites(user_name)
    connection = mongo_client.connection
    for site in sites:
        c_items = getSiteDBCollection(connection, site['site_id'], "items")
        site['items_count'] = c_items.find({"available": True}).count()
    return render_to_response("dashboard/index.html", 
            {"page_name": "控制台首页", "sites": sites, "user_name": user_name,
             },
            context_instance=RequestContext(request))
Ejemplo n.º 53
0
def itemInfoListFromItemIdList(site_id, item_id_list):
    c_items = getSiteDBCollection(mongo_client.connection, site_id, "items")
    item_info_list = [
        item
        for item in c_items.find(
            {"item_id": {"$in": item_id_list}}, {"item_id": 1, "item_name": 1, "item_link": 1, "image_link": ""}
        )
    ]
    for item_info in item_info_list:
        del item_info["_id"]
    return item_info_list
Ejemplo n.º 54
0
 def updateCategory(self, site_id, category):
     c_categories = getSiteDBCollection(self.connection, site_id,
                                        "categories")
     cat_in_db = c_categories.find_one(
         {"category_id": category["category_id"]})
     if cat_in_db is None:
         cat_in_db = {}
     else:
         cat_in_db = {"_id": cat_in_db["_id"]}
     cat_in_db.update(category)
     c_categories.save(cat_in_db)
Ejemplo n.º 55
0
def createCalculationRecord(site_id):
    calculation_id = str(uuid.uuid4())
    record = {
        "calculation_id": calculation_id,
        "begin_datetime": datetime.datetime.now(),
        "flows": {}
    }
    calculation_records = getSiteDBCollection(connection, site_id,
                                              "calculation_records")
    calculation_records.save(record)
    return calculation_id
Ejemplo n.º 56
0
def insertUserOrderFromRawLog(connection, site_id, raw_log):
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    amount = 0
    for order_item in raw_log["order_content"]:
        amount += float(order_item["price"]) * int(order_item["amount"])
    c_user_orders.insert({
        "user_id": raw_log["filled_user_id"],
        "order_datetime": raw_log["created_on"],
        "raw_log_id": raw_log["_id"],
        "amount": amount
    })
Ejemplo n.º 57
0
def fix_site_checking_daemon_logs(connection, site_id):
    c_site_checking_daemon_logs = getSiteDBCollection(
        connection, site_id, "site_checking_daemon_logs")
    c_site_checking_daemon_logs.drop_indexes()
    c_site_checking_daemon_logs.ensure_index("created_on",
                                             -1,
                                             background=True,
                                             unique=False)
    c_site_checking_daemon_logs.ensure_index("checking_id",
                                             1,
                                             background=True,
                                             unique=True)
Ejemplo n.º 58
0
def fix_calculation_records(connection, site_id):
    c_calculation_records = getSiteDBCollection(connection, site_id,
                                                "calculation_records")
    c_calculation_records.drop_indexes()
    c_calculation_records.ensure_index("begin_datetime",
                                       -1,
                                       background=True,
                                       unique=False)
    c_calculation_records.ensure_index("end_datetime",
                                       -1,
                                       background=True,
                                       unique=False)
Ejemplo n.º 59
0
 def getSimilaritiesForItem(self, site_id, similarity_type, item_id):
     item_similarities = getSiteDBCollection(
         self.connection, site_id, "item_similarities_%s" % similarity_type)
     result = item_similarities.find_one(
         {"item_id": item_id},
         read_preference=ReadPreference.SECONDARY_PREFERRED)
     if result is not None:
         topn = result["mostSimilarItems"]
     else:
         topn = []
     topn = self.apply_black_list2topn(site_id, item_id, topn)
     return topn