def insert_att(self,lan,lav,an,res_av): ''' 如果res_av--正则表达式匹配 ''' if res_av: thd = p_jkh.sub('',res_av[0]).strip() if an == self.keys_digikey[3]: ''' 处理Quantity Available ''' lan.append(an) if thd: lav.append(thd.replace(',','')) else: ''' 库存为0 ''' lav.append('0') else: lan.append(an) lav.append(thd)
def get_one_cartprice(self,html): ''' 精确获取到了某个型号的购物车页面 此时购物车页面有且仅有一条记录 ''' one_cartprice = {} for key,p_info in list_keys_p: res_info = p_info.findall(html) if res_info: info = res_info[0] info = p_jkh.sub('', info).strip() if key == 'Mouser detail url': ''' 详细信息页面加上mouser域名 ''' one_cartprice[key] = 'http://cn.mouser.com' + info else: one_cartprice[key] = info else: one_cartprice[key] = '' return one_cartprice
def get_dict_info(self,hc): """ 详细信息页面代码中获取关键信息 """ dict_info = {} try: """ 一 二级分类 """ res_fsc = self.p_first_second_catetory.findall(hc) res_fsc = change_res_shuxian(res_fsc) """ 供应商型号 """ res_supp_partno = self.p_supp_partno.findall(hc) res_supp_partno = change_res_shuxian(res_supp_partno) """ 生产商型号 """ res_mfr_partno = self.p_mfr_partno.findall(hc) res_mfr_partno = change_res_shuxian(res_mfr_partno) """ 生产厂商 """ res_manufacturer = self.p_manufacturer.findall(hc) res_manufacturer = change_res_shuxian(res_manufacturer) """ 描述 """ res_description = self.p_description.findall(hc) res_description = change_res_shuxian(res_description) """ 寿命周期 """ res_lifecycle = self.p_lifecycle.findall(hc) res_lifecycle = change_res_shuxian(res_lifecycle) """ 限制信息 """ res_shiprest = self.p_shiprest.findall(hc) res_shiprest = change_res_shuxian(res_shiprest) """ 图片url """ res_image_url = self.p_image_url.findall(hc) """ pdf url """ res_pdf_url = self.p_pdf_url.findall(hc) """ 加上封装 """ res_packaging = self.p_packaging.findall(hc) res_packaging = change_res_shuxian(res_packaging) if res_packaging: dict_info[keys_mouser_tt[14][1]] = res_packaging[0] """ 加上详细信息页面url """ res_info_url = self.p_info_url.findall(hc) if res_info_url: dict_info[keys_mouser_tt[27][1]] = res_info_url[0] """ 加上种类 """ res_pc = self.p_pc.findall(hc) res_pc = change_res_shuxian(res_pc) if res_pc: dict_info[keys_mouser_tt[21][1]] = p_jkh.sub('',res_pc[0]).strip() if res_fsc: dict_info[keys_mouser_tt[22][1]] = res_fsc[0][0] + ' >> ' + res_fsc[0][1] if res_supp_partno and res_supp_partno[0] in ['Not Assigned','未分配']: if debug: print 'line 232 find mouser partno is invalid, so return {}' return {} if res_supp_partno: dict_info[keys_mouser_tt[18][1]] = res_supp_partno[0] if res_mfr_partno: dict_info[keys_mouser_tt[1][1]] = p_jkh.sub('', res_mfr_partno[0]) if res_manufacturer: dict_info[keys_mouser_tt[2][1]] = res_manufacturer[0] if res_description: dict_info[keys_mouser_tt[3][1]] = p_jkh.sub('', res_description[0]) if res_lifecycle: dict_info[keys_mouser_tt[4][1]] = p_jkh.sub('',res_lifecycle[0]) if res_shiprest: ''' 匹配到页面发货限制的信息时,设置键字shiprest''' shiprest = res_shiprest[0] res_shiprestinfo = self.p_shiprestinfo.findall(shiprest) ''' res_shiprestinfo: [('src','alt'),()] ''' if res_shiprestinfo: ''' 每个限制信息格式: src||alt 可能有一或者多个限制信息 每种限制信息以|||隔开 ''' sr_info = [[self.url_mouser_qz + src.replace('../',''), alt] for src,alt in res_shiprestinfo] ''' [src||alt,...] ''' sr_info = ['||'.join(one) for one in sr_info] ''' src||alt|||src||alt ... ''' sr_info = '|||'.join(sr_info) dict_info[keys_mouser_tt[13][1]] = sr_info else: dict_info[keys_mouser_tt[13][1]] = unicode('该产品存在发货限制,请上mouser网站查询','utf-8') ''' 设置禁运类型 EIP CCC BIG等 edit by daimingming on 2013年 03月 01日 星期五 14:38:21 CST ''' res_shiprest_type = self.p_shiprest_type.findall(shiprest) if res_shiprest_type: dict_info[keys_mouser_tt[30][1]] = '||'.join(res_shiprest_type) else: dict_info[keys_mouser_tt[30][1]] = unicode('该产品存在发货限制','utf-8') if res_image_url: dict_info[keys_mouser_tt[28][1]] = res_image_url[0].strip() if res_pdf_url: dict_info[keys_mouser_tt[23][1]] = res_pdf_url[0].strip() """ 处理详细信息 这里特殊处理 RoHS 格式为: attname:attvalue|||... """ res_special_info = self.p_special_info.findall(hc) if res_special_info: list_special_info = [] for an,av in res_special_info: if an == 'RoHS': if 'icon_rohs.gif' in av: dict_info[keys_mouser_tt[24][1]] = 'Yes' else: dict_info[keys_mouser_tt[24][1]] = 'No' else: av = p_jkh.sub('',av).strip().replace(':','') list_special_info.append('%s:%s' % (an,av)) special_info = '|||'.join(list_special_info) dict_info[keys_mouser_tt[29][1]] = special_info """ 库存 """ res_div_avai = self.p_div_availability.findall(hc) if res_div_avai: div_avai = res_div_avai[0] res_stock = self.p_stock.findall(div_avai) res_stock = change_res_shuxian(res_stock) res_on_order = self.p_on_order.findall(div_avai) res_on_order = change_res_shuxian(res_on_order) res_flt = self.p_flt.findall(div_avai) res_flt = change_res_shuxian(res_flt) if res_stock: ''' 为了显示的更好效果 ''' info = res_stock[0] info = info.replace('</tr>','\n').replace(',','') info = p_jkh.sub('',info) info = p_nbsp.sub('',info) info = info.replace(' ','') if info and info[-1] == '\n': info = info[:-1] else: pass dict_info[keys_mouser_tt[17][1]] = info res_num = p_num.findall(info)#该正则表达式不来自配置文件 if res_num: dict_info[keys_mouser_tt[5][1]] = res_num[0] else: dict_info[keys_mouser_tt[5][1]] = '0' if res_on_order: info = p_jkh.sub('',res_on_order[0]) info = p_nbsp.sub('',info) info = info.replace(' ','') res_num_start = p_num_start.findall(info) if res_num_start: dict_info[keys_mouser_tt[6][1]] = res_num_start[0] else: dict_info[keys_mouser_tt[6][1]] = '0' if res_flt: info = p_jkh.sub('',res_flt[0]) info = p_nbsp.sub('',info) info = info.replace(' ','') dict_info[keys_mouser_tt[7][1]] = info else: pass """ 价格 """ res_minimum = self.p_minimum.findall(hc) res_minimum = change_res_shuxian(res_minimum)# res_multiples = self.p_multiples.findall(hc) res_multiples = change_res_shuxian(res_multiples)# if res_minimum: dict_info[keys_mouser_tt[8][1]] = int(res_minimum[0].replace(' ','')) else: dict_info[keys_mouser_tt[8][1]] = '0' if res_multiples: dict_info[keys_mouser_tt[9][1]] = int(res_multiples[0].replace(' ','')) else: dict_info[keys_mouser_tt[9][1]] = '0' res_div_price = self.p_div_price.findall(hc) if res_div_price: div_price = res_div_price[0] res_buy_quantity = self.p_buy_quantity.findall(div_price) res_buy_price = self.p_buy_price.findall(div_price) res_spe_xing = self.p_spe_xing.findall(div_price) res_spe_xing = change_res_shuxian(res_spe_xing)# res_spe_price = self.p_spe_price.findall(div_price) res_reel = self.p_reel.findall(div_price) res_more = self.p_more.findall(div_price) res_more = change_res_shuxian(res_more)# if res_buy_quantity and res_buy_price: if len(res_buy_quantity) == len(res_buy_price): """ 数量个数 与 购买价格个数 一致 """ list_price_normal = [] i = 0 while i < len(res_buy_quantity): bq = res_buy_quantity[i].replace(',','')#'number' bp = res_buy_price[i] if bq and bp: ''' 防止出现有购买数量 无购买价格的情况 如页面 http://cn.mouser.com/ProductDetail/Littelfuse/0202125H/?qs=sGAEpiMZZMseCiJT91fwIpCtAz8CGq9CFANOg93eDRM%3d''' list_price_normal.append('%s:%s' % (bq,bp)) i += 1 price_info = '|||'.join(list_price_normal) dict_info[keys_mouser_tt[10][1]] = price_info else: pass if res_buy_quantity and res_spe_price: if len(res_buy_quantity) == len(res_spe_price) and res_spe_xing: """ 数量个数 与 特殊价格个数 一致 """ list_price_special = [] i = 0 while i < len(res_buy_quantity): bq = res_buy_quantity[i].replace(',','')#'number' bp = res_spe_price[i] if bq and bp: list_price_special.append('%s:%s' % (bq,bp)) i += 1 price_spe_info = '|||'.join(list_price_special) dict_info[keys_mouser_tt[16][1]] = price_spe_info else: pass if res_reel: reel = res_reel[0] reel = p_jkh.sub('',reel) reel = reel.replace(' ','') dict_info[keys_mouser_tt[11][1]] = reel if res_more: more = res_more[0] more = self.url_mouser_qz + more.replace('../','') dict_info[keys_mouser_tt[12][1]] = more else: pass return dict_info except Exception,e: if debug:print 'line 353 exception :\n',e return {}
def get_dict_info(self,hc,**dict_args): ''' 冲详细信息页面的代码中获取数据 {} ''' fun = 'function get_dict_info of %s' % self.name dict_info = {} hc = filter_html(hc) ''' 此时 hc 是否为详细信息页面的代码 ''' iip = dict_args.get('is_info_page', False) if iip: ''' 处在详细信息页面 ''' res_imgurl = p_imgurl_iip.findall(hc) res_partno = p_partno_iip.findall(hc) res_mfr = p_mfr_iip.findall(hc) res_desc = p_desc_iip.findall(hc) res_stock = p_stock_iip.findall(hc) else: res_imgurl = p_imgurl.findall(hc) res_partno = p_partno.findall(hc) res_mfr = p_mfr.findall(hc) res_desc = p_desc.findall(hc) res_stock = p_stock.findall(hc) if res_imgurl: dict_info[keys_mouser_tt[28][1]] = p_jkh.sub('',res_imgurl[0]).replace('"','').strip() if res_partno: dict_info[keys_mouser_tt[1][1]] = p_jkh.sub('',res_partno[0]).strip() if res_mfr: dict_info[keys_mouser_tt[2][1]] = p_jkh.sub('',res_mfr[0]).strip() if res_desc: dict_info[keys_mouser_tt[3][1]] = p_jkh.sub('',res_desc[0]).strip() if res_stock: dict_info[keys_mouser_tt[5][1]] = p_jkh.sub('',res_stock[0]).strip() ''' 处理区间价格 [('1-24','$82.08'),.('25 +','$61.56')] >>> ['1:$82.08','1000:$61.56'] >>> '1:$82.08|||25:$61.56' ''' if iip: ''' 详细信息页面 ''' res_priceinfo = p_priceinfo_iip.findall(hc) pc_pp_new = ['%s:%s' % (p_invalidnum.sub('',pc).strip(), pp) for pc,pp in res_priceinfo] pc_pp_finall = '|||'.join(pc_pp_new) else: ''' 现在为表格页面模式 获取该<tr>对应的详细信息页面url ''' res_seemore = p_seemore.findall(hc) res_priceinfo = []#默认设置空 if res_seemore: ''' 需要进入详细信息页面获取完整价格 ''' res_uip = p_uip.findall(hc) if res_uip: dict_info[keys_mouser_tt[27][1]] = res_uip[0] hc_more = filter_html(get_html_urllib(res_uip[0], 2)) if hc_more != 'timeout': ''' 正常获取了 详细信息页面的价格信息 ''' res_priceinfo = p_priceinfo_iip.findall(hc_more) else: ''' 只需从<tr>行字符串获取价格 即可 ''' res_priceinfo = p_priceinfo.findall(hc) pc_pp_new = ['%s:%s' % (p_invalidnum.sub('',pc).strip(), pp) for pc,pp in res_priceinfo] pc_pp_finall = '|||'.join(pc_pp_new) if pc_pp_finall:dict_info[keys_mouser_tt[10][1]] = pc_pp_finall ''' 加上型号搜索页面url 详细页面url 详细页面html代码 ''' dict_info[keys_mouser_tt[19][1]] = self.url_search return dict_info
def get_dict_info(self,hc): ''' 冲详细信息页面的代码中获取数据 {} ''' fun = 'function get_dict_info of %s' % self.name dict_info = {} hc = filter_html(hc) res_mfr = p_mfr.findall(hc) if res_mfr: dict_info[keys_mouser_tt[2][1]] = res_mfr[0] res_partno = p_partno.findall(hc) if res_partno: dict_info[keys_mouser_tt[1][1]] = res_partno[0] res_desc = p_desc.findall(hc) if res_desc: dict_info[keys_mouser_tt[3][1]] = p_jkh.sub('', res_desc[0]).strip() res_pdf = p_pdf.findall(hc) if res_pdf: dict_info[keys_mouser_tt[23][1]] = res_pdf[0] res_pack = p_pack.findall(hc) if res_pack: dict_info[keys_mouser_tt[14][1]] = res_pack[0].strip() res_rohs = p_rohs.findall(hc) if res_rohs: dict_info[keys_mouser_tt[24][1]] = res_rohs[0].strip() res_pc_pp = p_pc_pp.findall(hc) if res_pc_pp: ''' 处理区间价格 ''' ''' [('1-24','$82.08'),.('over 1000','$61.56')] >>> ['1:$82.08','1000:$61.56'] >>> '1:$82.08|||1000:$61.56' ''' pc_pp_new = ['%s:%s' % (p_invalidnum.sub('',pc), pp) for pc,pp in res_pc_pp] pc_pp_finall = '|||'.join(pc_pp_new) dict_info[keys_mouser_tt[10][1]] = pc_pp_finall res_yourcost = p_yourcost.findall(hc) if res_yourcost: dict_info[keys_mouser_tt[25][1]] = res_yourcost[0].strip() res_table_lnq = p_table_lnq.findall(hc) if res_table_lnq: table_lnq = res_table_lnq[0] res_tr_lnq = p_tr_lnq.findall(table_lnq) qty = 0 for l,n,q in res_tr_lnq: qty += int(q) dict_info[keys_mouser_tt[5][1]] = qty dict_info[keys_mouser_tt[26][1]] = res_tr_lnq ''' 加上型号搜索页面url 详细页面url 详细页面html代码 ''' dict_info[keys_mouser_tt[19][1]] = self.url_search res_uip = p_uip.findall(hc) if res_uip: dict_info[keys_mouser_tt[20][1]] = res_uip[0] return dict_info