def _process(self, site_id, args): err_msg, args = self.ap.processArgs(args) if err_msg: return {"code": 1, "err_msg": err_msg} else: if args["parent_categories"] is None: args["parent_categories"] = [] else: args["parent_categories"] = smart_split(args["parent_categories"], ",") mongo_client.updateCategory(site_id, args) return {"code": 0}
def _process(self, site_id, args): err_msg, args = self.ap.processArgs(args) if err_msg: return {"code": 1, "err_msg": err_msg} else: if args["parent_categories"] is None: args["parent_categories"] = [] else: args["parent_categories"] = smart_split( args["parent_categories"], ",") mongo_client.updateCategory(site_id, args) return {"code": 0}
def _process(self, site_id, args): err_msg, args = self.ap.processArgs(args) if err_msg: return {"code": 1, "err_msg": err_msg} else: if args["description"] is None: del args["description"] if args["image_link"] is None: del args["image_link"] if args["price"] is None: del args["price"] if args["market_price"] is None: del args["market_price"] if args["categories"] is None: args["categories"] = [] else: args["categories"] = smart_split(args["categories"], ",") if args["item_group"] is None: del args["item_group"] mongo_client.updateItem(site_id, args) return {"code": 0}
for user_id in user_ids.keys(): mongo_client.updateUserPurchasingHistory(site_id, user_id) print "updated for %s" % site_id print "=======================\n" print "Fix item categories" for site in connection["tjb-db"]["sites"].find(): site_id = site["site_id"] print "Work on %s" % site_id c_items = getSiteDBCollection(connection, site_id, "items") for item_row in c_items.find(): categories = item_row.get("categories", None) if isinstance(categories, basestring): c_items.update({"item_id": item_row["item_id"]}, {"$set": {"categories": smart_split(categories, ",")}}) elif categories is None: c_items.update({"item_id": item_row["item_id"]}, {"$set": {"categories": []}}) print "=======================\n" print "Fix sites" from Adminboard.site_utils import generateApiKey sites = connection["tjb-db"]["sites"] for site in sites.find(): if not site.has_key("api_key"): api_key = generateApiKey(connection, site["site_id"], site["site_name"]) sites.update({"site_id": site["site_id"]}, {"$set": {"api_key": api_key}}) calc_interval = site.get("calc_interval", None)
def calc_order_items_with_rec_info(site_id, connection, client): # client.execute("DROP TABLE order_items") # client.execute("CREATE TABLE order_items ( " # "date_str STRING, " # "hour INT, " # "uniq_order_id STRING, " # "order_id STRING, " # "user_id STRING, " # "tjbid STRING, " # "item_id STRING," # "price FLOAT, " # "amount INT" # ")" # "ROW FORMAT DELIMITED " # "FIELDS TERMINATED BY ',' " # "STORED AS TEXTFILE") # client.execute("INSERT OVERWRITE TABLE order_items " # " SELECT a.date_str, a.hour, a.uniq_order_id, a.order_id, a.filled_user_id as user_id, " # " a.tjbid, a.item_id, a.price, a.amount " # " FROM " # " (SELECT DISTINCT brl.date_str, brl.hour, brl.uniq_order_id, brl.order_id, brl.filled_user_id, " # " brl.tjbid, brl.item_id, brl.price, brl.amount " # " FROM rec_buy rb1 " # " RIGHT OUTER JOIN backfilled_raw_logs brl ON (rb1.uniq_order_id = brl.uniq_order_id AND rb1.item_id = brl.item_id) " # " WHERE brl.behavior = 'PLO' " # " ) a" # ) """ TODO: There are duplicated orders in raw_logs, there are order_id with different uniq_order_id. The duplication will be kept in this hive table. But will be de-duplicated in the table for csv dump. """ client.execute("DROP TABLE order_items_with_rec_info") client.execute("CREATE TABLE order_items_with_rec_info ( " "created_on DOUBLE, " "date_str STRING, " "hour INT, " "uniq_order_id STRING, " "order_id STRING, " "user_id STRING, " "tjbid STRING, " "item_id STRING," "price FLOAT, " "amount INT, " "src_item_id STRING, " "src_behavior STRING, " "src_created_on STRING, " "src_date_str STRING, " "src_hour INT, " "is_rec_item BOOLEAN " ")" "ROW FORMAT DELIMITED " "FIELDS TERMINATED BY ',' " "STORED AS TEXTFILE") client.execute("INSERT OVERWRITE TABLE order_items_with_rec_info " " SELECT oi.created_on, oi.date_str, oi.hour, " " oi.uniq_order_id, oi.order_id, oi.user_id, oi.tjbid, " " oi.item_id, oi.price, oi.amount, " " rl.item_id AS src_item_id, " " rl.behavior AS src_behavior, " " rl.created_on AS src_created_on, " " rl.date_str AS src_date_str, " " rl.hour AS src_hour, " " rl.behavior IS NOT NULL " " FROM " " (SELECT DISTINCT brl.created_on, brl.date_str, brl.hour, brl.uniq_order_id, brl.order_id, brl.filled_user_id as user_id, " " brl.tjbid, brl.item_id, brl.price, brl.amount " " FROM rec_buy rb1 " " RIGHT OUTER JOIN backfilled_raw_logs brl ON (rb1.uniq_order_id = brl.uniq_order_id AND rb1.item_id = brl.item_id) " " WHERE brl.behavior = 'PLO' " " ) oi" " LEFT OUTER JOIN rec_buy rb " " ON rb.uniq_order_id = oi.uniq_order_id and rb.item_id = oi.item_id " " LEFT OUTER JOIN recommendation_logs rl " " ON rl.req_id = rb.src_req_id" ) client.execute("SELECT oi.date_str FROM order_items_with_rec_info oi SORT BY oi.date_str DESC limit 1") row = client.fetchOne() if (row == None or row == ''): pass else: data = result_as_dict(smart_split(row, "\t"), ["date_str"]) date_str = data["date_str"] assert len(date_str) == 10 month = date_str[0:7] month_ = (month.replace('-', '')) client.execute("drop table csv_%s" % (month_)) client.execute("create table csv_%s row format delimited fields terminated by ','" " lines terminated by '\n' as " "select distinct user_id, order_id, item_id, price, amount, date_str, hour, src_item_id, src_behavior, src_date_str, src_hour " "from order_items_with_rec_info where is_rec_item = true and date_str like '%%%s%%'" % (month_, month))
def yieldClientResults(client): while True: row = client.fetchOne() if (row == None or row == ''): break yield smart_split(row, "\t")