def fill_catalog(): tnow = timeHelper.getNowLong() catalog_id = 1000 vlist = [] tlist = [] for cdict in JD_CATALOG_ARRAY: ttp = [] catids = cdict['category_ids'] catalog_name = cdict['catalog_name'].strip() ttp += [ catalog_id, catalog_name, catalog_id, tnow ] for item in catids: vtp = [] cat_name = item.strip() id_prefix_array = _get_category_id_prefix_given_category_name(cat_name) vtp += id_prefix_array vtp += [catalog_id, catalog_name, tnow] vlist.append(vtp) tlist.append(ttp) catalog_id += 1000 sql1 = 'insert into jd_catalog values(%s,%s,%s,%s)' tlist += _get_fixed_catalog() ar1 = dbhelper.executeSqlWriteMany(sql1, tlist) sql2 = 'insert into jd_catalog_map values(%s,%s,%s,%s,%s)' ar2 = dbhelper.executeSqlWriteMany(sql2, vlist) print 'jd_catalog rows inserted: %s' %ar1 print 'jd_catalog_map rows inserted: %s' %ar2
def fill_catalog(): tnow = timeHelper.getNowLong() catalog_id = 1000 vlist = [] tlist = [] for cdict in JD_CATALOG_ARRAY: ttp = [] catids = cdict['category_ids'] catalog_name = cdict['catalog_name'].strip() ttp += [catalog_id, catalog_name, catalog_id, tnow] for item in catids: vtp = [] cat_name = item.strip() id_prefix_array = _get_category_id_prefix_given_category_name( cat_name) vtp += id_prefix_array vtp += [catalog_id, catalog_name, tnow] vlist.append(vtp) tlist.append(ttp) catalog_id += 1000 sql1 = 'insert into jd_catalog values(%s,%s,%s,%s)' tlist += _get_fixed_catalog() ar1 = dbhelper.executeSqlWriteMany(sql1, tlist) sql2 = 'insert into jd_catalog_map values(%s,%s,%s,%s,%s)' ar2 = dbhelper.executeSqlWriteMany(sql2, vlist) print 'jd_catalog rows inserted: %s' % ar1 print 'jd_catalog_map rows inserted: %s' % ar2
def update_history_lowest_store(): skulist = getHistoryLowest_SkuIds() dt = timeHelper.getNowLong() vlist = [] for sku_id in skulist: vlist.append([sku_id, dt]) sql = 'insert ignore into jd_notification_history_lowest values (%s,%s)' afr = dbhelper.executeSqlWriteMany(sql,vlist) sql2 = 'replace into jd_notification_job_status values("%s","%s")'%(NOTIFICATION_JOB_NAME,dt) afr2 = dbhelper.executeSqlWrite1(sql2) afr3 = _removeOldNotifications() afr4 = _removeOutdated_Nonhistory_lowest() return [afr,afr2,afr3,afr4]
def processItemPromo(): vlist = [] glist = [] update_date = timeHelper.getNowLong() recent = timeHelper.getTimeAheadOfNowHours(datamining_config.PROMO_ITEM_RECENCY_HOURS,timeHelper.FORMAT_LONG) logging.debug('Reading jd_promo_item_latest...' ) sql = ''' select sku_id, dt, promo_json from jd_promo_item_latest where promo_json is not NULL and LENGTH(promo_json)>100 and dt>="%s" ''' %recent retrows = dbhelper.executeSqlRead(sql,is_dirty=True) # total_rows = len(retrows) num_error = 0 num17 = 0 logging.debug('completed!') logging.debug("Total rows with promo_json: %s" %len(retrows)) for row in retrows: sku_id = row['sku_id'] dt = row['dt'] obj = None try: obj = json.loads(row['promo_json']) except: num_error += 1 continue rtags = obj['pickOneTag'] for tag in rtags: pid = tag['pid'] code = tag['code'] # 不记录加价购 if code == "17": num17 += 1 continue name = tag['name'] content = tag['content'] adurl = tag['adurl'] if 'adurl' in tag else "" tp = [sku_id, dt, pid, code, name, content, adurl, update_date] vlist.append(tp) tags = obj['tags'] for tag in tags: pid = tag['pid'] code = tag['code'] name = tag['name'] if 'name' in tag else "" if code == "10": # gift gifts = tag['gifts'] for gift in gifts: gift_name = "赠品" try: gift_name = gift['nm'] gift_num = gift['num'] if 'num' in gift else 1 gift_image = gift['mp'] if 'mp' in gift else "" gift_sku_id = gift['sid'] if 'sid' in gift else "" gift_gt = gift['gt'] if 'gt' in gift else "" gift_gs = gift['gs'] if 'gs' in gift else "" tp_gift = [sku_id,dt,pid,code, name, gift_name, gift_num, gift_image, gift_sku_id, gift_gt, gift_gs, update_date] glist.append(tp_gift) except Exception as e: logging.debug("error in extracting gift info for sku_id = %s"%sku_id) logging.debug("%s" %e) else: content = tag['content'] adurl = tag['adurl'] if 'adurl' in tag else "" tp = [sku_id, dt, pid, code, name, content, adurl, update_date] vlist.append(tp) logging.error("IGNOR-ABLE: num of errors: %s (like json.loads error)" %num_error) logging.debug('num17: %s' %num17 ) logging.debug('vlist len: %s' %len(vlist) ) logging.debug('glist len: %s' %len(glist) ) sql_cb_promo_item = ''' CREATE TABLE jd_analytic_promo_item_latest ( sku_id bigint(20) NOT NULL, dt datetime NOT NULL, pid varchar(255) NOT NULL, code varchar(255) NOT NULL, name varchar(255) NOT NULL, content varchar(255) NOT NULL, adurl varchar(255) DEFAULT NULL, update_date datetime NOT NULL, PRIMARY KEY (sku_id,pid) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ''' sql_cb_promo_gift = ''' CREATE TABLE jd_analytic_promo_gift_latest ( sku_id bigint(20) NOT NULL, dt datetime NOT NULL, pid varchar(255) NOT NULL, code varchar(255) NOT NULL, name varchar(255) NOT NULL, gift_name varchar(255) NOT NULL, gift_num int(11) NOT NULL, gift_image varchar(255) DEFAULT NULL, gift_sku_id bigint(20) NOT NULL, gift_gt varchar(255) DEFAULT NULL, gift_gs varchar(255) DEFAULT NULL, update_date datetime NOT NULL, PRIMARY KEY (sku_id,pid) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ''' # persist in DB ret1 = ret2 = None if len(vlist)>0: ret1 = crawler_helper.persist_db_history_and_lastest_empty_first( table_name='jd_analytic_promo_item', num_cols=len(vlist[0]), value_list=vlist, is_many=True, need_history=False, sql_create_table=sql_cb_promo_item, ) if len(glist)>0: ret2 = crawler_helper.persist_db_history_and_lastest_empty_first( table_name='jd_analytic_promo_gift', num_cols=len(glist[0]), value_list=glist, is_many=True, need_history=False, sql_create_table=sql_cb_promo_gift, ) # record gift sglist = [] cur_time = timeHelper.getNowLong() for gg in glist: sglist.append([gg[0],cur_time]) sql_gg = 'insert ignore into jd_analytic_sku_gift values(%s,%s)' afr = dbhelper.executeSqlWriteMany(sql_gg,sglist) ret3 = { 'status': 0 if afr > 0 else -1, 'msg': "", } return _generate_mixed_ret([ret1,ret2, ret3])
def processItemPromo(): vlist = [] glist = [] update_date = timeHelper.getNowLong() recent = timeHelper.getTimeAheadOfNowHours( datamining_config.PROMO_ITEM_RECENCY_HOURS, timeHelper.FORMAT_LONG) logging.debug('Reading jd_promo_item_latest...') sql = ''' select sku_id, dt, promo_json from jd_promo_item_latest where promo_json is not NULL and LENGTH(promo_json)>100 and dt>="%s" ''' % recent retrows = dbhelper.executeSqlRead(sql, is_dirty=True) # total_rows = len(retrows) num_error = 0 num17 = 0 logging.debug('completed!') logging.debug("Total rows with promo_json: %s" % len(retrows)) for row in retrows: sku_id = row['sku_id'] dt = row['dt'] obj = None try: obj = json.loads(row['promo_json']) except: num_error += 1 continue rtags = obj['pickOneTag'] for tag in rtags: pid = tag['pid'] code = tag['code'] # 不记录加价购 if code == "17": num17 += 1 continue name = tag['name'] content = tag['content'] adurl = tag['adurl'] if 'adurl' in tag else "" tp = [sku_id, dt, pid, code, name, content, adurl, update_date] vlist.append(tp) tags = obj['tags'] for tag in tags: pid = tag['pid'] code = tag['code'] name = tag['name'] if 'name' in tag else "" if code == "10": # gift gifts = tag['gifts'] for gift in gifts: gift_name = "赠品" try: gift_name = gift['nm'] gift_num = gift['num'] if 'num' in gift else 1 gift_image = gift['mp'] if 'mp' in gift else "" gift_sku_id = gift['sid'] if 'sid' in gift else "" gift_gt = gift['gt'] if 'gt' in gift else "" gift_gs = gift['gs'] if 'gs' in gift else "" tp_gift = [ sku_id, dt, pid, code, name, gift_name, gift_num, gift_image, gift_sku_id, gift_gt, gift_gs, update_date ] glist.append(tp_gift) except Exception as e: logging.debug( "error in extracting gift info for sku_id = %s" % sku_id) logging.debug("%s" % e) else: content = tag['content'] adurl = tag['adurl'] if 'adurl' in tag else "" tp = [sku_id, dt, pid, code, name, content, adurl, update_date] vlist.append(tp) logging.error("IGNOR-ABLE: num of errors: %s (like json.loads error)" % num_error) logging.debug('num17: %s' % num17) logging.debug('vlist len: %s' % len(vlist)) logging.debug('glist len: %s' % len(glist)) sql_cb_promo_item = ''' CREATE TABLE jd_analytic_promo_item_latest ( sku_id bigint(20) NOT NULL, dt datetime NOT NULL, pid varchar(255) NOT NULL, code varchar(255) NOT NULL, name varchar(255) NOT NULL, content varchar(255) NOT NULL, adurl varchar(255) DEFAULT NULL, update_date datetime NOT NULL, PRIMARY KEY (sku_id,pid) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ''' sql_cb_promo_gift = ''' CREATE TABLE jd_analytic_promo_gift_latest ( sku_id bigint(20) NOT NULL, dt datetime NOT NULL, pid varchar(255) NOT NULL, code varchar(255) NOT NULL, name varchar(255) NOT NULL, gift_name varchar(255) NOT NULL, gift_num int(11) NOT NULL, gift_image varchar(255) DEFAULT NULL, gift_sku_id bigint(20) NOT NULL, gift_gt varchar(255) DEFAULT NULL, gift_gs varchar(255) DEFAULT NULL, update_date datetime NOT NULL, PRIMARY KEY (sku_id,pid) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ''' # persist in DB ret1 = ret2 = None if len(vlist) > 0: ret1 = crawler_helper.persist_db_history_and_lastest_empty_first( table_name='jd_analytic_promo_item', num_cols=len(vlist[0]), value_list=vlist, is_many=True, need_history=False, sql_create_table=sql_cb_promo_item, ) if len(glist) > 0: ret2 = crawler_helper.persist_db_history_and_lastest_empty_first( table_name='jd_analytic_promo_gift', num_cols=len(glist[0]), value_list=glist, is_many=True, need_history=False, sql_create_table=sql_cb_promo_gift, ) # record gift sglist = [] cur_time = timeHelper.getNowLong() for gg in glist: sglist.append([gg[0], cur_time]) sql_gg = 'insert ignore into jd_analytic_sku_gift values(%s,%s)' afr = dbhelper.executeSqlWriteMany(sql_gg, sglist) ret3 = { 'status': 0 if afr > 0 else -1, 'msg': "", } return _generate_mixed_ret([ret1, ret2, ret3])