Example #1
0
def load_click_items(numid2volumeprice):
    logger.info("Loading click items")
    click_items = []
    paid_items = []
    click_item_type = namedtuple(
        "ClickItemType",
        'click_hash item_id click_time click_ip area_code click_price click_volume item_price item_volume shop_nick taobao_report_id num_id'
    )

    db = get_db_engine()
    where = "click_time>='%s' and click_time<='%s'" % (datestr(
        FLAGS.start), datestr(FLAGS.end))
    if FLAGS.limit > 0:
        where += " limit %s" % FLAGS.limit
    sql = "select outer_code,item_id,click_time,click_ip,click_area,click_price,click_volume,item.price,item.volume,shop.nick,click_item_log.taobao_report_id,item.num_id from click_item_log left join item on click_item_log.item_id=item.id left join shop on shop.id=item.shop_id where %s" % where
    logger.debug("fetching %s", sql)
    results = db.execute(sql)
    progress = 0
    item_matched = 0
    logger.info("Processing click items %s", results.rowcount)
    price_diffs = 0
    for line in results:
        progress += 1
        click_item = click_item_type(*line)
        if not click_item.num_id:
            logger.warn("no numid %s", click_item)
            continue
        click_items.append(click_item)
        if click_item.item_id > 0:
            item_matched += 1
        volume = click_item.item_volume
        if not volume or volume == 0:
            logger.warn("item %s abnormal %s", click_item.item_id, volume)
            volume = 0.2
        elif volume > 800:
            volume = 800

        price = click_item.click_price
        if click_item.item_price and price > click_item.item_price * 1.5:
            price = click_item.item_price
            price_diffs += 1
            logger.warn("Price diff paid? %s %s/%s too much %s - %s",
                        click_item.taobao_report_id, price_diffs,
                        results.rowcount, click_item.click_price,
                        click_item.item_price)
        if price > 500.0:
            price = 500.0
        if not price or price < 0.5:
            logger.warn("price %s abnormal %s", click_item.item_id, price)
            price = 1.0

        numid2volumeprice[long(click_item.num_id)] = {
            'volume': volume,
            'price': price
        }
        if click_item.taobao_report_id:
            paid_items.append(click_item.taobao_report_id)
    logger.info("Total click %s item matched %s", len(click_items),
                item_matched)
    return click_items, paid_items
 def process(self):
     where = "click_time >= '%s' and click_time <= '%s'" % (datestr(FLAGS.start), datestr(FLAGS.end))
     click_sql = "select id, outer_code, item_id from click_item_log where %s" % where
     logger.debug("Executing %s", click_sql)
     click_items = list(self.guangdb.execute(click_sql))
     logger.debug("processing %s", len(click_items))
     for click_item in click_items:
         outer_code = 'jn%s' % click_item[1]
         pay_sql = "select id, num_iid, pay_time, trade_id, item_title, seller_nick, shop_title from taobao_report where outer_code='%s'" % outer_code
         pay_item = list(self.guangdb.execute(pay_sql))
         # price, volume, votescore, votescore_s2, created
         if pay_item: # positive
             logger.debug("Matched logid %s reportid %s", click_item[0], pay_item[0][0])
             self.guangdb.execute("update click_item_log set taobao_report_id=%s where id=%s" % (pay_item[0][0], click_item[0]))
def load_click_items(numid2volumeprice):
    logger.info("Loading click items")
    click_items = []
    paid_items = []
    click_item_type = namedtuple("ClickItemType", 'click_hash item_id click_time click_ip area_code click_price click_volume item_price item_volume shop_nick taobao_report_id num_id')

    db = get_db_engine()
    where = "click_time>='%s' and click_time<='%s'" % (datestr(FLAGS.start), datestr(FLAGS.end))
    if FLAGS.limit > 0:
        where += " limit %s" % FLAGS.limit
    sql = "select outer_code,item_id,click_time,click_ip,click_area,click_price,click_volume,item.price,item.volume,shop.nick,click_item_log.taobao_report_id,item.num_id from click_item_log left join item on click_item_log.item_id=item.id left join shop on shop.id=item.shop_id where %s" % where
    logger.debug("fetching %s", sql)
    results = db.execute(sql)
    progress = 0
    item_matched = 0
    logger.info("Processing click items %s", results.rowcount)
    price_diffs = 0
    for line in results:
        progress += 1
        click_item = click_item_type(*line)
        if not click_item.num_id:
            logger.warn("no numid %s", click_item)
            continue
        click_items.append(click_item)
        if click_item.item_id > 0:
            item_matched += 1
        volume = click_item.item_volume
        if not volume or volume == 0:
            logger.warn("item %s abnormal %s", click_item.item_id, volume)
            volume = 0.2
        elif volume > 800:
            volume = 800

        price = click_item.click_price
        if click_item.item_price and price > click_item.item_price * 1.5:
            price = click_item.item_price
            price_diffs += 1
            logger.warn("Price diff paid? %s %s/%s too much %s - %s", click_item.taobao_report_id, price_diffs, results.rowcount, click_item.click_price, click_item.item_price)
        if price > 500.0:
            price = 500.0
        if not price or price < 0.5:
            logger.warn("price %s abnormal %s", click_item.item_id, price)
            price = 1.0

        numid2volumeprice[long(click_item.num_id)] = {'volume' : volume, 'price' : price}
        if click_item.taobao_report_id:
            paid_items.append(click_item.taobao_report_id)
    logger.info("Total click %s item matched %s", len(click_items), item_matched)
    return click_items, paid_items
Example #4
0
def clicklog_main():
    click_file_list = []
    for d in eachday(FLAGS.start, FLAGS.end):
        click_file_list.extend(glob("/space/log/filtered/click*/click-" + datestr(d) + "_00???"))
    # TODO: load from conversion db?
    ret = []
    if FLAGS.commit:
        db = get_db_engine()
    for fn in click_file_list:
        logger.debug("processing %s", fn)
        for line in open(fn, "r"):
            click = get_click(line)
            if not click:
                continue
            click_obj, click_ex_obj, score, why = click
            rec   = get_record(click)
            #if rec[0] in written:
            #    continue #already written in db.
            if rec:
                if FLAGS.commit:
                    insert_match(db, rec)
                else:
                    ret.append(rec)
    simplejson.dump(ret, open(FLAGS.out_file, "w"))
    return ret
Example #5
0
 def process(self):
     where = "click_time >= '%s' and click_time <= '%s'" % (datestr(
         FLAGS.start), datestr(FLAGS.end))
     click_sql = "select id, outer_code, item_id from click_item_log where %s" % where
     logger.debug("Executing %s", click_sql)
     click_items = list(self.guangdb.execute(click_sql))
     logger.debug("processing %s", len(click_items))
     for click_item in click_items:
         outer_code = 'jn%s' % click_item[1]
         pay_sql = "select id, num_iid, pay_time, trade_id, item_title, seller_nick, shop_title from taobao_report where outer_code='%s'" % outer_code
         pay_item = list(self.guangdb.execute(pay_sql))
         # price, volume, votescore, votescore_s2, created
         if pay_item:  # positive
             logger.debug("Matched logid %s reportid %s", click_item[0],
                          pay_item[0][0])
             self.guangdb.execute(
                 "update click_item_log set taobao_report_id=%s where id=%s"
                 % (pay_item[0][0], click_item[0]))