def load_click_items(numid2volumeprice): logger.info("Loading click items") click_items = [] paid_items = [] click_item_type = namedtuple( "ClickItemType", 'click_hash item_id click_time click_ip area_code click_price click_volume item_price item_volume shop_nick taobao_report_id num_id' ) db = get_db_engine() where = "click_time>='%s' and click_time<='%s'" % (datestr( FLAGS.start), datestr(FLAGS.end)) if FLAGS.limit > 0: where += " limit %s" % FLAGS.limit sql = "select outer_code,item_id,click_time,click_ip,click_area,click_price,click_volume,item.price,item.volume,shop.nick,click_item_log.taobao_report_id,item.num_id from click_item_log left join item on click_item_log.item_id=item.id left join shop on shop.id=item.shop_id where %s" % where logger.debug("fetching %s", sql) results = db.execute(sql) progress = 0 item_matched = 0 logger.info("Processing click items %s", results.rowcount) price_diffs = 0 for line in results: progress += 1 click_item = click_item_type(*line) if not click_item.num_id: logger.warn("no numid %s", click_item) continue click_items.append(click_item) if click_item.item_id > 0: item_matched += 1 volume = click_item.item_volume if not volume or volume == 0: logger.warn("item %s abnormal %s", click_item.item_id, volume) volume = 0.2 elif volume > 800: volume = 800 price = click_item.click_price if click_item.item_price and price > click_item.item_price * 1.5: price = click_item.item_price price_diffs += 1 logger.warn("Price diff paid? %s %s/%s too much %s - %s", click_item.taobao_report_id, price_diffs, results.rowcount, click_item.click_price, click_item.item_price) if price > 500.0: price = 500.0 if not price or price < 0.5: logger.warn("price %s abnormal %s", click_item.item_id, price) price = 1.0 numid2volumeprice[long(click_item.num_id)] = { 'volume': volume, 'price': price } if click_item.taobao_report_id: paid_items.append(click_item.taobao_report_id) logger.info("Total click %s item matched %s", len(click_items), item_matched) return click_items, paid_items
def process(self): where = "click_time >= '%s' and click_time <= '%s'" % (datestr(FLAGS.start), datestr(FLAGS.end)) click_sql = "select id, outer_code, item_id from click_item_log where %s" % where logger.debug("Executing %s", click_sql) click_items = list(self.guangdb.execute(click_sql)) logger.debug("processing %s", len(click_items)) for click_item in click_items: outer_code = 'jn%s' % click_item[1] pay_sql = "select id, num_iid, pay_time, trade_id, item_title, seller_nick, shop_title from taobao_report where outer_code='%s'" % outer_code pay_item = list(self.guangdb.execute(pay_sql)) # price, volume, votescore, votescore_s2, created if pay_item: # positive logger.debug("Matched logid %s reportid %s", click_item[0], pay_item[0][0]) self.guangdb.execute("update click_item_log set taobao_report_id=%s where id=%s" % (pay_item[0][0], click_item[0]))
def load_click_items(numid2volumeprice): logger.info("Loading click items") click_items = [] paid_items = [] click_item_type = namedtuple("ClickItemType", 'click_hash item_id click_time click_ip area_code click_price click_volume item_price item_volume shop_nick taobao_report_id num_id') db = get_db_engine() where = "click_time>='%s' and click_time<='%s'" % (datestr(FLAGS.start), datestr(FLAGS.end)) if FLAGS.limit > 0: where += " limit %s" % FLAGS.limit sql = "select outer_code,item_id,click_time,click_ip,click_area,click_price,click_volume,item.price,item.volume,shop.nick,click_item_log.taobao_report_id,item.num_id from click_item_log left join item on click_item_log.item_id=item.id left join shop on shop.id=item.shop_id where %s" % where logger.debug("fetching %s", sql) results = db.execute(sql) progress = 0 item_matched = 0 logger.info("Processing click items %s", results.rowcount) price_diffs = 0 for line in results: progress += 1 click_item = click_item_type(*line) if not click_item.num_id: logger.warn("no numid %s", click_item) continue click_items.append(click_item) if click_item.item_id > 0: item_matched += 1 volume = click_item.item_volume if not volume or volume == 0: logger.warn("item %s abnormal %s", click_item.item_id, volume) volume = 0.2 elif volume > 800: volume = 800 price = click_item.click_price if click_item.item_price and price > click_item.item_price * 1.5: price = click_item.item_price price_diffs += 1 logger.warn("Price diff paid? %s %s/%s too much %s - %s", click_item.taobao_report_id, price_diffs, results.rowcount, click_item.click_price, click_item.item_price) if price > 500.0: price = 500.0 if not price or price < 0.5: logger.warn("price %s abnormal %s", click_item.item_id, price) price = 1.0 numid2volumeprice[long(click_item.num_id)] = {'volume' : volume, 'price' : price} if click_item.taobao_report_id: paid_items.append(click_item.taobao_report_id) logger.info("Total click %s item matched %s", len(click_items), item_matched) return click_items, paid_items
def clicklog_main(): click_file_list = [] for d in eachday(FLAGS.start, FLAGS.end): click_file_list.extend(glob("/space/log/filtered/click*/click-" + datestr(d) + "_00???")) # TODO: load from conversion db? ret = [] if FLAGS.commit: db = get_db_engine() for fn in click_file_list: logger.debug("processing %s", fn) for line in open(fn, "r"): click = get_click(line) if not click: continue click_obj, click_ex_obj, score, why = click rec = get_record(click) #if rec[0] in written: # continue #already written in db. if rec: if FLAGS.commit: insert_match(db, rec) else: ret.append(rec) simplejson.dump(ret, open(FLAGS.out_file, "w")) return ret
def process(self): where = "click_time >= '%s' and click_time <= '%s'" % (datestr( FLAGS.start), datestr(FLAGS.end)) click_sql = "select id, outer_code, item_id from click_item_log where %s" % where logger.debug("Executing %s", click_sql) click_items = list(self.guangdb.execute(click_sql)) logger.debug("processing %s", len(click_items)) for click_item in click_items: outer_code = 'jn%s' % click_item[1] pay_sql = "select id, num_iid, pay_time, trade_id, item_title, seller_nick, shop_title from taobao_report where outer_code='%s'" % outer_code pay_item = list(self.guangdb.execute(pay_sql)) # price, volume, votescore, votescore_s2, created if pay_item: # positive logger.debug("Matched logid %s reportid %s", click_item[0], pay_item[0][0]) self.guangdb.execute( "update click_item_log set taobao_report_id=%s where id=%s" % (pay_item[0][0], click_item[0]))