def fill_catalog():

    tnow = timeHelper.getNowLong()

    catalog_id = 1000
    vlist = []
    tlist = []
    for cdict in JD_CATALOG_ARRAY:
        ttp = []
        catids = cdict['category_ids']
        catalog_name = cdict['catalog_name'].strip()
        ttp += [ catalog_id, catalog_name, catalog_id, tnow ]

        for item in catids:
            vtp = []
            cat_name = item.strip()
            id_prefix_array = _get_category_id_prefix_given_category_name(cat_name)
            vtp += id_prefix_array
            vtp += [catalog_id, catalog_name, tnow]
            vlist.append(vtp)

        tlist.append(ttp)
        catalog_id += 1000

    sql1 = 'insert into jd_catalog values(%s,%s,%s,%s)'
    tlist += _get_fixed_catalog()
    ar1 = dbhelper.executeSqlWriteMany(sql1, tlist)

    sql2 = 'insert into jd_catalog_map values(%s,%s,%s,%s,%s)'
    ar2 = dbhelper.executeSqlWriteMany(sql2, vlist)

    print 'jd_catalog rows inserted: %s' %ar1
    print 'jd_catalog_map rows inserted: %s' %ar2
Beispiel #2
0
def fill_catalog():

    tnow = timeHelper.getNowLong()

    catalog_id = 1000
    vlist = []
    tlist = []
    for cdict in JD_CATALOG_ARRAY:
        ttp = []
        catids = cdict['category_ids']
        catalog_name = cdict['catalog_name'].strip()
        ttp += [catalog_id, catalog_name, catalog_id, tnow]

        for item in catids:
            vtp = []
            cat_name = item.strip()
            id_prefix_array = _get_category_id_prefix_given_category_name(
                cat_name)
            vtp += id_prefix_array
            vtp += [catalog_id, catalog_name, tnow]
            vlist.append(vtp)

        tlist.append(ttp)
        catalog_id += 1000

    sql1 = 'insert into jd_catalog values(%s,%s,%s,%s)'
    tlist += _get_fixed_catalog()
    ar1 = dbhelper.executeSqlWriteMany(sql1, tlist)

    sql2 = 'insert into jd_catalog_map values(%s,%s,%s,%s,%s)'
    ar2 = dbhelper.executeSqlWriteMany(sql2, vlist)

    print 'jd_catalog rows inserted: %s' % ar1
    print 'jd_catalog_map rows inserted: %s' % ar2
def update_history_lowest_store():
    skulist = getHistoryLowest_SkuIds()
    dt = timeHelper.getNowLong()
    vlist = []
    for sku_id in skulist:
        vlist.append([sku_id, dt])
    sql = 'insert ignore into jd_notification_history_lowest values (%s,%s)'
    afr = dbhelper.executeSqlWriteMany(sql,vlist)
    sql2 = 'replace into jd_notification_job_status values("%s","%s")'%(NOTIFICATION_JOB_NAME,dt)
    afr2 = dbhelper.executeSqlWrite1(sql2)
    afr3 = _removeOldNotifications()
    afr4 = _removeOutdated_Nonhistory_lowest()
    return [afr,afr2,afr3,afr4]
def processItemPromo():
    vlist = []
    glist = []
    update_date = timeHelper.getNowLong()
    recent = timeHelper.getTimeAheadOfNowHours(datamining_config.PROMO_ITEM_RECENCY_HOURS,timeHelper.FORMAT_LONG)
    logging.debug('Reading jd_promo_item_latest...' )
    sql = '''
        select sku_id, dt, promo_json from jd_promo_item_latest
        where promo_json is not NULL and LENGTH(promo_json)>100
        and dt>="%s"
    ''' %recent
    retrows = dbhelper.executeSqlRead(sql,is_dirty=True)
    # total_rows = len(retrows)
    num_error = 0
    num17 = 0
    logging.debug('completed!')
    logging.debug("Total rows with promo_json: %s" %len(retrows))
    for row in retrows:
        sku_id = row['sku_id']
        dt = row['dt']
        obj = None
        try:
            obj = json.loads(row['promo_json'])
        except:
            num_error += 1
            continue
        rtags = obj['pickOneTag']
        for tag in rtags:
            pid = tag['pid']
            code = tag['code']
            # 不记录加价购
            if code == "17":
                num17 += 1
                continue
            name = tag['name']
            content = tag['content']
            adurl = tag['adurl'] if 'adurl' in tag else ""
            tp = [sku_id, dt, pid, code, name, content, adurl, update_date]
            vlist.append(tp)
        tags = obj['tags']
        for tag in tags:
            pid = tag['pid']
            code = tag['code']
            name = tag['name'] if 'name' in tag else ""
            if code == "10":
                # gift
                gifts = tag['gifts']
                for gift in gifts:
                    gift_name = "赠品"
                    try:
                        gift_name = gift['nm']
                        gift_num = gift['num'] if 'num' in gift else 1
                        gift_image = gift['mp'] if 'mp' in gift else ""
                        gift_sku_id = gift['sid'] if 'sid' in gift else ""
                        gift_gt = gift['gt'] if 'gt' in gift else ""
                        gift_gs = gift['gs'] if 'gs' in gift else ""
                        tp_gift = [sku_id,dt,pid,code, name, gift_name, gift_num, gift_image, gift_sku_id, gift_gt, gift_gs, update_date]
                        glist.append(tp_gift)
                    except Exception as e:
                        logging.debug("error in extracting gift info for sku_id = %s"%sku_id)
                        logging.debug("%s" %e)
            else:
                content = tag['content']
                adurl = tag['adurl'] if 'adurl' in tag else ""
                tp = [sku_id, dt, pid, code, name, content, adurl, update_date]
                vlist.append(tp)

    logging.error("IGNOR-ABLE: num of errors: %s (like json.loads error)" %num_error)
    logging.debug('num17: %s' %num17 )
    logging.debug('vlist len: %s' %len(vlist) )
    logging.debug('glist len: %s' %len(glist) )

    sql_cb_promo_item = '''
        CREATE TABLE jd_analytic_promo_item_latest (
          sku_id bigint(20) NOT NULL,
          dt datetime NOT NULL,
          pid varchar(255) NOT NULL,
          code varchar(255) NOT NULL,
          name varchar(255) NOT NULL,
          content varchar(255) NOT NULL,
          adurl varchar(255) DEFAULT NULL,
          update_date datetime NOT NULL,
          PRIMARY KEY (sku_id,pid)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8
    '''

    sql_cb_promo_gift = '''
        CREATE TABLE jd_analytic_promo_gift_latest (
          sku_id bigint(20) NOT NULL,
          dt datetime NOT NULL,
          pid varchar(255) NOT NULL,
          code varchar(255) NOT NULL,
          name varchar(255) NOT NULL,
          gift_name varchar(255) NOT NULL,
          gift_num int(11) NOT NULL,
          gift_image varchar(255) DEFAULT NULL,
          gift_sku_id bigint(20) NOT NULL,
          gift_gt varchar(255) DEFAULT NULL,
          gift_gs varchar(255) DEFAULT NULL,
          update_date datetime NOT NULL,
          PRIMARY KEY (sku_id,pid)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8
    '''

    # persist in DB
    ret1 = ret2 = None
    if len(vlist)>0:
        ret1 = crawler_helper.persist_db_history_and_lastest_empty_first(
            table_name='jd_analytic_promo_item',
            num_cols=len(vlist[0]),
            value_list=vlist,
            is_many=True,
            need_history=False,
            sql_create_table=sql_cb_promo_item,
        )
    if len(glist)>0:
        ret2 = crawler_helper.persist_db_history_and_lastest_empty_first(
            table_name='jd_analytic_promo_gift',
            num_cols=len(glist[0]),
            value_list=glist,
            is_many=True,
            need_history=False,
            sql_create_table=sql_cb_promo_gift,
        )

        # record gift
        sglist = []
        cur_time = timeHelper.getNowLong()
        for gg in glist:
            sglist.append([gg[0],cur_time])
        sql_gg = 'insert ignore into jd_analytic_sku_gift values(%s,%s)'
        afr = dbhelper.executeSqlWriteMany(sql_gg,sglist)
        ret3 = {
            'status': 0 if afr > 0 else -1,
            'msg': "",
        }

    return _generate_mixed_ret([ret1,ret2, ret3])
Beispiel #5
0
def processItemPromo():
    vlist = []
    glist = []
    update_date = timeHelper.getNowLong()
    recent = timeHelper.getTimeAheadOfNowHours(
        datamining_config.PROMO_ITEM_RECENCY_HOURS, timeHelper.FORMAT_LONG)
    logging.debug('Reading jd_promo_item_latest...')
    sql = '''
        select sku_id, dt, promo_json from jd_promo_item_latest
        where promo_json is not NULL and LENGTH(promo_json)>100
        and dt>="%s"
    ''' % recent
    retrows = dbhelper.executeSqlRead(sql, is_dirty=True)
    # total_rows = len(retrows)
    num_error = 0
    num17 = 0
    logging.debug('completed!')
    logging.debug("Total rows with promo_json: %s" % len(retrows))
    for row in retrows:
        sku_id = row['sku_id']
        dt = row['dt']
        obj = None
        try:
            obj = json.loads(row['promo_json'])
        except:
            num_error += 1
            continue
        rtags = obj['pickOneTag']
        for tag in rtags:
            pid = tag['pid']
            code = tag['code']
            # 不记录加价购
            if code == "17":
                num17 += 1
                continue
            name = tag['name']
            content = tag['content']
            adurl = tag['adurl'] if 'adurl' in tag else ""
            tp = [sku_id, dt, pid, code, name, content, adurl, update_date]
            vlist.append(tp)
        tags = obj['tags']
        for tag in tags:
            pid = tag['pid']
            code = tag['code']
            name = tag['name'] if 'name' in tag else ""
            if code == "10":
                # gift
                gifts = tag['gifts']
                for gift in gifts:
                    gift_name = "赠品"
                    try:
                        gift_name = gift['nm']
                        gift_num = gift['num'] if 'num' in gift else 1
                        gift_image = gift['mp'] if 'mp' in gift else ""
                        gift_sku_id = gift['sid'] if 'sid' in gift else ""
                        gift_gt = gift['gt'] if 'gt' in gift else ""
                        gift_gs = gift['gs'] if 'gs' in gift else ""
                        tp_gift = [
                            sku_id, dt, pid, code, name, gift_name, gift_num,
                            gift_image, gift_sku_id, gift_gt, gift_gs,
                            update_date
                        ]
                        glist.append(tp_gift)
                    except Exception as e:
                        logging.debug(
                            "error in extracting gift info for sku_id = %s" %
                            sku_id)
                        logging.debug("%s" % e)
            else:
                content = tag['content']
                adurl = tag['adurl'] if 'adurl' in tag else ""
                tp = [sku_id, dt, pid, code, name, content, adurl, update_date]
                vlist.append(tp)

    logging.error("IGNOR-ABLE: num of errors: %s (like json.loads error)" %
                  num_error)
    logging.debug('num17: %s' % num17)
    logging.debug('vlist len: %s' % len(vlist))
    logging.debug('glist len: %s' % len(glist))

    sql_cb_promo_item = '''
        CREATE TABLE jd_analytic_promo_item_latest (
          sku_id bigint(20) NOT NULL,
          dt datetime NOT NULL,
          pid varchar(255) NOT NULL,
          code varchar(255) NOT NULL,
          name varchar(255) NOT NULL,
          content varchar(255) NOT NULL,
          adurl varchar(255) DEFAULT NULL,
          update_date datetime NOT NULL,
          PRIMARY KEY (sku_id,pid)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8
    '''

    sql_cb_promo_gift = '''
        CREATE TABLE jd_analytic_promo_gift_latest (
          sku_id bigint(20) NOT NULL,
          dt datetime NOT NULL,
          pid varchar(255) NOT NULL,
          code varchar(255) NOT NULL,
          name varchar(255) NOT NULL,
          gift_name varchar(255) NOT NULL,
          gift_num int(11) NOT NULL,
          gift_image varchar(255) DEFAULT NULL,
          gift_sku_id bigint(20) NOT NULL,
          gift_gt varchar(255) DEFAULT NULL,
          gift_gs varchar(255) DEFAULT NULL,
          update_date datetime NOT NULL,
          PRIMARY KEY (sku_id,pid)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8
    '''

    # persist in DB
    ret1 = ret2 = None
    if len(vlist) > 0:
        ret1 = crawler_helper.persist_db_history_and_lastest_empty_first(
            table_name='jd_analytic_promo_item',
            num_cols=len(vlist[0]),
            value_list=vlist,
            is_many=True,
            need_history=False,
            sql_create_table=sql_cb_promo_item,
        )
    if len(glist) > 0:
        ret2 = crawler_helper.persist_db_history_and_lastest_empty_first(
            table_name='jd_analytic_promo_gift',
            num_cols=len(glist[0]),
            value_list=glist,
            is_many=True,
            need_history=False,
            sql_create_table=sql_cb_promo_gift,
        )

        # record gift
        sglist = []
        cur_time = timeHelper.getNowLong()
        for gg in glist:
            sglist.append([gg[0], cur_time])
        sql_gg = 'insert ignore into jd_analytic_sku_gift values(%s,%s)'
        afr = dbhelper.executeSqlWriteMany(sql_gg, sglist)
        ret3 = {
            'status': 0 if afr > 0 else -1,
            'msg': "",
        }

    return _generate_mixed_ret([ret1, ret2, ret3])