Exemple #1
0
 def _process(self, site_id, args):
     err_msg, args = self.ap.processArgs(args)
     if err_msg:
         return {"code": 1, "err_msg": err_msg}
     else:
         if args["parent_categories"] is None:
             args["parent_categories"] = []
         else:
             args["parent_categories"] = smart_split(args["parent_categories"], ",")
     mongo_client.updateCategory(site_id, args)
     return {"code": 0}
Exemple #2
0
 def _process(self, site_id, args):
     err_msg, args = self.ap.processArgs(args)
     if err_msg:
         return {"code": 1, "err_msg": err_msg}
     else:
         if args["parent_categories"] is None:
             args["parent_categories"] = []
         else:
             args["parent_categories"] = smart_split(
                 args["parent_categories"], ",")
     mongo_client.updateCategory(site_id, args)
     return {"code": 0}
Exemple #3
0
 def _process(self, site_id, args):
     err_msg, args = self.ap.processArgs(args)
     if err_msg:
         return {"code": 1, "err_msg": err_msg}
     else:
         if args["description"] is None:
             del args["description"]
         if args["image_link"] is None:
             del args["image_link"]
         if args["price"] is None:
             del args["price"]
         if args["market_price"] is None:
             del args["market_price"]
         if args["categories"] is None:
             args["categories"] = []
         else:
             args["categories"] = smart_split(args["categories"], ",")
         if args["item_group"] is None:
             del args["item_group"]
         mongo_client.updateItem(site_id, args)
         return {"code": 0}
Exemple #4
0
 def _process(self, site_id, args):
     err_msg, args = self.ap.processArgs(args)
     if err_msg:
         return {"code": 1, "err_msg": err_msg}
     else:
         if args["description"] is None:
             del args["description"]
         if args["image_link"] is None:
             del args["image_link"]
         if args["price"] is None:
             del args["price"]
         if args["market_price"] is None:
             del args["market_price"]
         if args["categories"] is None:
             args["categories"] = []
         else:
             args["categories"] = smart_split(args["categories"], ",")
         if args["item_group"] is None:
             del args["item_group"]
         mongo_client.updateItem(site_id, args)
         return {"code": 0}
Exemple #5
0
        for user_id in user_ids.keys():
            mongo_client.updateUserPurchasingHistory(site_id, user_id)
        print "updated for %s" % site_id
    print "=======================\n"


    print "Fix item categories"
    for site in connection["tjb-db"]["sites"].find():
        site_id = site["site_id"]
        print "Work on %s" % site_id
        c_items = getSiteDBCollection(connection, site_id, "items")
        for item_row in c_items.find():
            categories = item_row.get("categories", None)
            if isinstance(categories, basestring):
                c_items.update({"item_id": item_row["item_id"]}, 
                        {"$set": {"categories": smart_split(categories, ",")}})
            elif categories is None:
                c_items.update({"item_id": item_row["item_id"]}, 
                        {"$set": {"categories": []}})
    print "=======================\n"


    print "Fix sites"
    from Adminboard.site_utils import generateApiKey
    sites = connection["tjb-db"]["sites"]
    for site in sites.find():
        if not site.has_key("api_key"):
            api_key = generateApiKey(connection, site["site_id"], site["site_name"])
            sites.update({"site_id": site["site_id"]},
                        {"$set": {"api_key": api_key}})
        calc_interval = site.get("calc_interval", None)
def calc_order_items_with_rec_info(site_id, connection, client):
    # client.execute("DROP TABLE   order_items")
    # client.execute("CREATE TABLE order_items ( "
    #                  "date_str STRING, "
    #                  "hour INT, "
    #                  "uniq_order_id STRING, "
    #                  "order_id STRING, "
    #                  "user_id STRING, "
    #                  "tjbid STRING, "
    #                  "item_id STRING,"
    #                  "price FLOAT, "
    #                  "amount INT"
    #                  ")"
    #                  "ROW FORMAT DELIMITED "
    #                  "FIELDS TERMINATED BY ',' "
    #                  "STORED AS TEXTFILE")

    # client.execute("INSERT OVERWRITE TABLE order_items "
    #                "  SELECT a.date_str, a.hour, a.uniq_order_id, a.order_id, a.filled_user_id as user_id, "
    #                "         a.tjbid, a.item_id, a.price, a.amount "
    #                "  FROM "
    #                "   (SELECT DISTINCT brl.date_str, brl.hour, brl.uniq_order_id, brl.order_id, brl.filled_user_id, "
    #                "    brl.tjbid, brl.item_id, brl.price, brl.amount "
    #                "    FROM rec_buy rb1 "
    #                "    RIGHT OUTER JOIN backfilled_raw_logs brl ON (rb1.uniq_order_id = brl.uniq_order_id AND rb1.item_id = brl.item_id) "
    #                "    WHERE brl.behavior = 'PLO' "
    #                "   ) a"
    #                )


    """
      TODO:

        There are duplicated orders in raw_logs, there are order_id with different uniq_order_id.
        The duplication will be kept in this hive table. But will be de-duplicated in the table for csv dump.
    """
    client.execute("DROP TABLE   order_items_with_rec_info")
    client.execute("CREATE TABLE order_items_with_rec_info ( "
                     "created_on DOUBLE, "
                     "date_str STRING, "
                     "hour INT, "
                     "uniq_order_id STRING, "
                     "order_id STRING, "
                     "user_id STRING, "
                     "tjbid STRING, "
                     "item_id STRING,"
                     "price FLOAT, "
                     "amount INT, "
                     "src_item_id STRING, "
                     "src_behavior STRING, "
                     "src_created_on STRING, "
                     "src_date_str STRING, "
                     "src_hour INT, "
                     "is_rec_item BOOLEAN "
                     ")"
                     "ROW FORMAT DELIMITED "
                     "FIELDS TERMINATED BY ',' "
                     "STORED AS TEXTFILE")

    client.execute("INSERT OVERWRITE TABLE order_items_with_rec_info "
                   "  SELECT oi.created_on, oi.date_str, oi.hour, "
                   "         oi.uniq_order_id, oi.order_id, oi.user_id, oi.tjbid, "
                   "         oi.item_id, oi.price, oi.amount, "
                   "         rl.item_id AS src_item_id, "
                   "         rl.behavior AS src_behavior, "
                   "         rl.created_on AS src_created_on, "
                   "         rl.date_str AS src_date_str, "
                   "         rl.hour AS src_hour, "
                   "         rl.behavior IS NOT NULL "
                   "  FROM "
                   "   (SELECT DISTINCT brl.created_on, brl.date_str, brl.hour, brl.uniq_order_id, brl.order_id, brl.filled_user_id as user_id, "
                   "    brl.tjbid, brl.item_id, brl.price, brl.amount "
                   "    FROM rec_buy rb1 "
                   "    RIGHT OUTER JOIN backfilled_raw_logs brl ON (rb1.uniq_order_id = brl.uniq_order_id AND rb1.item_id = brl.item_id) "
                   "    WHERE brl.behavior = 'PLO' "
                   "   ) oi"
                   "    LEFT OUTER JOIN rec_buy rb "
                   "      ON rb.uniq_order_id = oi.uniq_order_id and rb.item_id = oi.item_id "
                   "    LEFT OUTER JOIN recommendation_logs rl "
                   "      ON rl.req_id = rb.src_req_id"
                  )

    client.execute("SELECT oi.date_str FROM order_items_with_rec_info oi SORT BY oi.date_str DESC limit 1")

    row = client.fetchOne()
    if (row == None or row == ''):
        pass
    else:

        data = result_as_dict(smart_split(row, "\t"), ["date_str"])
        date_str = data["date_str"]
        assert len(date_str) == 10
        month = date_str[0:7]
        month_ = (month.replace('-', ''))

        client.execute("drop table csv_%s" % (month_))
        client.execute("create table csv_%s row format delimited fields terminated by ','"
          " lines terminated by '\n' as "
          "select distinct user_id, order_id, item_id, price, amount, date_str, hour, src_item_id, src_behavior, src_date_str, src_hour "
          "from order_items_with_rec_info where is_rec_item = true and date_str like '%%%s%%'" % (month_, month))
def yieldClientResults(client):
    while True:
        row = client.fetchOne()
        if (row == None or row == ''):
            break
        yield smart_split(row, "\t")
def calc_order_items_with_rec_info(site_id, connection, client):
    # client.execute("DROP TABLE   order_items")
    # client.execute("CREATE TABLE order_items ( "
    #                  "date_str STRING, "
    #                  "hour INT, "
    #                  "uniq_order_id STRING, "
    #                  "order_id STRING, "
    #                  "user_id STRING, "
    #                  "tjbid STRING, "
    #                  "item_id STRING,"
    #                  "price FLOAT, "
    #                  "amount INT"
    #                  ")"
    #                  "ROW FORMAT DELIMITED "
    #                  "FIELDS TERMINATED BY ',' "
    #                  "STORED AS TEXTFILE")

    # client.execute("INSERT OVERWRITE TABLE order_items "
    #                "  SELECT a.date_str, a.hour, a.uniq_order_id, a.order_id, a.filled_user_id as user_id, "
    #                "         a.tjbid, a.item_id, a.price, a.amount "
    #                "  FROM "
    #                "   (SELECT DISTINCT brl.date_str, brl.hour, brl.uniq_order_id, brl.order_id, brl.filled_user_id, "
    #                "    brl.tjbid, brl.item_id, brl.price, brl.amount "
    #                "    FROM rec_buy rb1 "
    #                "    RIGHT OUTER JOIN backfilled_raw_logs brl ON (rb1.uniq_order_id = brl.uniq_order_id AND rb1.item_id = brl.item_id) "
    #                "    WHERE brl.behavior = 'PLO' "
    #                "   ) a"
    #                )


    """
      TODO:

        There are duplicated orders in raw_logs, there are order_id with different uniq_order_id.
        The duplication will be kept in this hive table. But will be de-duplicated in the table for csv dump.
    """
    client.execute("DROP TABLE   order_items_with_rec_info")
    client.execute("CREATE TABLE order_items_with_rec_info ( "
                     "created_on DOUBLE, "
                     "date_str STRING, "
                     "hour INT, "
                     "uniq_order_id STRING, "
                     "order_id STRING, "
                     "user_id STRING, "
                     "tjbid STRING, "
                     "item_id STRING,"
                     "price FLOAT, "
                     "amount INT, "
                     "src_item_id STRING, "
                     "src_behavior STRING, "
                     "src_created_on STRING, "
                     "src_date_str STRING, "
                     "src_hour INT, "
                     "is_rec_item BOOLEAN "
                     ")"
                     "ROW FORMAT DELIMITED "
                     "FIELDS TERMINATED BY ',' "
                     "STORED AS TEXTFILE")

    client.execute("INSERT OVERWRITE TABLE order_items_with_rec_info "
                   "  SELECT oi.created_on, oi.date_str, oi.hour, "
                   "         oi.uniq_order_id, oi.order_id, oi.user_id, oi.tjbid, "
                   "         oi.item_id, oi.price, oi.amount, "
                   "         rl.item_id AS src_item_id, "
                   "         rl.behavior AS src_behavior, "
                   "         rl.created_on AS src_created_on, "
                   "         rl.date_str AS src_date_str, "
                   "         rl.hour AS src_hour, "
                   "         rl.behavior IS NOT NULL "
                   "  FROM "
                   "   (SELECT DISTINCT brl.created_on, brl.date_str, brl.hour, brl.uniq_order_id, brl.order_id, brl.filled_user_id as user_id, "
                   "    brl.tjbid, brl.item_id, brl.price, brl.amount "
                   "    FROM rec_buy rb1 "
                   "    RIGHT OUTER JOIN backfilled_raw_logs brl ON (rb1.uniq_order_id = brl.uniq_order_id AND rb1.item_id = brl.item_id) "
                   "    WHERE brl.behavior = 'PLO' "
                   "   ) oi"
                   "    LEFT OUTER JOIN rec_buy rb "
                   "      ON rb.uniq_order_id = oi.uniq_order_id and rb.item_id = oi.item_id "
                   "    LEFT OUTER JOIN recommendation_logs rl "
                   "      ON rl.req_id = rb.src_req_id"
                  )

    client.execute("SELECT oi.date_str FROM order_items_with_rec_info oi SORT BY oi.date_str DESC limit 1")

    row = client.fetchOne()
    if (row == None or row == ''):
        pass
    else:

        data = result_as_dict(smart_split(row, "\t"), ["date_str"])
        date_str = data["date_str"]
        assert len(date_str) == 10
        month = date_str[0:7]
        month_ = (month.replace('-', ''))

        client.execute("drop table csv_%s" % (month_))
        client.execute("create table csv_%s row format delimited fields terminated by ','"
          " lines terminated by '\n' as "
          "select distinct user_id, order_id, item_id, price, amount, date_str, hour, src_item_id, src_behavior, src_date_str, src_hour "
          "from order_items_with_rec_info where is_rec_item = true and date_str like '%%%s%%'" % (month_, month))
def yieldClientResults(client):
    while True:
        row = client.fetchOne()
        if (row == None or row == ''):
            break
        yield smart_split(row, "\t")