Esempio n. 1
0
def generateEdmEmailingList(connection, site_id):
    logger = logging.getLogger("EDMCalculations")
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    if latest_order_datetime is None:
        query = {}
    else:
        query = {"order_datetime": {"$gte": latest_order_datetime \
                                - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}}
    db = getSiteDB(connection, site_id)
    result = db.command({"distinct": "user_orders", "key": "user_id", 
                "query": query})
    user_ids = result["values"]
    
    mongo_client = MongoClient(connection)
    c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list")
    c_edm_emailing_list.drop()
    c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list")
    count = 0
    t0 = time.time()
    for user_id in user_ids:
        count += 1
        if count % 100 == 0:
            logger.info("Count: %s, %s users/sec" % (count, count/(time.time() - t0)))
        recommendation_result, _ = mongo_client.recommend_for_edm(site_id, user_id, 
                                        max_amount=EXPECTED_RECOMMENDATION_ITEMS)
        if len(recommendation_result) == EXPECTED_RECOMMENDATION_ITEMS:
            c_edm_emailing_list.insert({"user_id": user_id, "recommendation_result": recommendation_result})
Esempio n. 2
0
def getEmailingUsers(connection, site_id, page_num, page_size):
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    if latest_order_datetime is None:
        query = {}
    else:
        query = {
            "order_datetime": {"$gte": latest_order_datetime - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}
        }
    db = getSiteDB(connection, site_id)
    result = db.command({"distinct": "user_orders", "key": "user_id", "query": query})
    user_ids = result["values"]
    selected_user_ids = user_ids[(page_num - 1) * page_size : page_num * page_size]
    max_page_num = len(user_ids) / page_size
    if len(user_ids) % page_size > 0:
        max_page_num += 1
    page_num_left = max(page_num - 4, 1)
    page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left)))
    models = [{"user_id": user_id} for user_id in selected_user_ids]
    return {
        "models": models,
        "page": page_num,
        "page_size": page_size,
        "total": len(user_ids),
        "prev_page_num": max(1, page_num - 1),
        "page_nums": range(page_num_left, page_num_right + 1),
        "next_page_num": min(max_page_num, page_num + 1),
        "max_page_num": max_page_num,
        "curr_left_reached": page_num == 1,
        "curr_right_reached": page_num >= max_page_num,
    }
Esempio n. 3
0
def getEmailingUsers(connection, site_id, page_num, page_size):
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    if latest_order_datetime is None:
        query = {}
    else:
        query = {"order_datetime": {"$gte": latest_order_datetime \
                                - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}}
    db = getSiteDB(connection, site_id)
    result = db.command({"distinct": "user_orders", "key": "user_id", 
                "query": query})
    user_ids = result["values"]
    selected_user_ids = user_ids[(page_num - 1) * page_size:page_num * page_size]
    max_page_num = len(user_ids) / page_size
    if len(user_ids) % page_size > 0:
        max_page_num += 1
    page_num_left = max(page_num - 4, 1)
    page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left)))
    models = [{"user_id": user_id} for user_id in selected_user_ids]
    return {"models": models, 
            "page": page_num,
            "page_size": page_size,
            "total": len(user_ids),
            "prev_page_num": max(1, page_num - 1),
            "page_nums": range(page_num_left, page_num_right + 1),
            "next_page_num": min(max_page_num, page_num + 1),
            "max_page_num": max_page_num,
            "curr_left_reached": page_num == 1,
            "curr_right_reached": page_num >= max_page_num}
Esempio n. 4
0
def generateEdmEmailingList(connection, site_id):
    logger = logging.getLogger("EDMCalculations")
    c_user_orders = getSiteDBCollection(connection, site_id, "user_orders")
    latest_order_datetime = getLatestUserOrderDatetime(connection, site_id)
    if latest_order_datetime is None:
        query = {}
    else:
        query = {"order_datetime": {"$gte": latest_order_datetime \
                                - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}}
    db = getSiteDB(connection, site_id)
    result = db.command({
        "distinct": "user_orders",
        "key": "user_id",
        "query": query
    })
    user_ids = result["values"]

    mongo_client = MongoClient(connection)
    c_edm_emailing_list = getSiteDBCollection(connection, site_id,
                                              "edm_emailing_list")
    c_edm_emailing_list.drop()
    c_edm_emailing_list = getSiteDBCollection(connection, site_id,
                                              "edm_emailing_list")
    count = 0
    t0 = time.time()
    for user_id in user_ids:
        count += 1
        if count % 100 == 0:
            logger.info("Count: %s, %s users/sec" % (count, count /
                                                     (time.time() - t0)))
        recommendation_result, _ = mongo_client.recommend_for_edm(
            site_id, user_id, max_amount=EXPECTED_RECOMMENDATION_ITEMS)
        if len(recommendation_result) == EXPECTED_RECOMMENDATION_ITEMS:
            c_edm_emailing_list.insert({
                "user_id":
                user_id,
                "recommendation_result":
                recommendation_result
            })
Esempio n. 5
0
mongo_client = MongoClient(pymongo.Connection(settings.mongodb_host))

parser = OptionParser()
parser.add_option("-r", "--reset_db", dest="reset_db", help="reset all database of this site(use with caution)",
                  default="no")
parser.add_option("-i", "--site_id", dest="site_id", help="Site ID(required)", default=None)
parser.add_option("-n", "--site_name", dest="site_name", help="Site Name(required)", default=None)
parser.add_option("-c", "--calc_interval", dest="calc_interval", help="How long (in seconds) the server will update the calculation", default="43200")

(options, args) = parser.parse_args()

site_id, site_name = options.site_id, options.site_name
#assert site_id is not None
#assert site_name is not None



# TODO: use dropDatabase?
connection = pymongo.Connection(settings.mongodb_host)
if options.reset_db == "yes":
    getSiteDBCollection(connection, site_id, "item_similarities").drop()
    getSiteDBCollection(connection, site_id, "raw_logs").drop()
    getSiteDBCollection(connection, site_id, "items").drop()
    getSiteDB(connection, site_id).create_collection("raw_logs", {})

getSiteDBCollection(connection, site_id, "raw_logs").ensure_index([("timestamp", -1)])

mongo_client.updateSite(site_id, site_name, int(options.calc_interval))

Esempio n. 6
0
 def getSiteDB(self, site_id):
     return getSiteDB(self.connection, site_id)
Esempio n. 7
0
 def getSiteDB(self, site_id):
     return getSiteDB(self.connection, site_id)
Esempio n. 8
0
parser.add_option("-n",
                  "--site_name",
                  dest="site_name",
                  help="Site Name(required)",
                  default=None)
parser.add_option(
    "-c",
    "--calc_interval",
    dest="calc_interval",
    help="How long (in seconds) the server will update the calculation",
    default="43200")

(options, args) = parser.parse_args()

site_id, site_name = options.site_id, options.site_name
#assert site_id is not None
#assert site_name is not None

# TODO: use dropDatabase?
connection = pymongo.Connection(settings.mongodb_host)
if options.reset_db == "yes":
    getSiteDBCollection(connection, site_id, "item_similarities").drop()
    getSiteDBCollection(connection, site_id, "raw_logs").drop()
    getSiteDBCollection(connection, site_id, "items").drop()
    getSiteDB(connection, site_id).create_collection("raw_logs", {})

getSiteDBCollection(connection, site_id,
                    "raw_logs").ensure_index([("timestamp", -1)])

mongo_client.updateSite(site_id, site_name, int(options.calc_interval))