예제 #1
0
 def insert_att(self,lan,lav,an,res_av):
     ''' 如果res_av--正则表达式匹配 '''
     if res_av:
         thd = p_jkh.sub('',res_av[0]).strip()
         if an == self.keys_digikey[3]:
             ''' 处理Quantity Available '''
             lan.append(an)
             if thd:
                 lav.append(thd.replace(',',''))
             else:
                 ''' 库存为0 '''
                 lav.append('0')
         else:
             lan.append(an)
             lav.append(thd)
예제 #2
0
    def get_one_cartprice(self,html):
        ''' 精确获取到了某个型号的购物车页面 此时购物车页面有且仅有一条记录 '''

        one_cartprice = {}
        for key,p_info in list_keys_p:
            res_info = p_info.findall(html)
            if res_info:
                info = res_info[0]
                info = p_jkh.sub('', info).strip()
                if key == 'Mouser detail url':
                    ''' 详细信息页面加上mouser域名 '''
                    one_cartprice[key] = 'http://cn.mouser.com' + info
                else:
                    one_cartprice[key] = info
            else:
                one_cartprice[key] = ''

        return one_cartprice
예제 #3
0
    def get_dict_info(self,hc):
        """ 详细信息页面代码中获取关键信息  """
        dict_info = {}

        try:
            """ 一 二级分类 """
            res_fsc = self.p_first_second_catetory.findall(hc)
            res_fsc = change_res_shuxian(res_fsc)
            """ 供应商型号 """
            res_supp_partno = self.p_supp_partno.findall(hc)
            res_supp_partno = change_res_shuxian(res_supp_partno)
            """ 生产商型号 """
            res_mfr_partno = self.p_mfr_partno.findall(hc)
            res_mfr_partno = change_res_shuxian(res_mfr_partno)
            """ 生产厂商 """
            res_manufacturer = self.p_manufacturer.findall(hc)
            res_manufacturer = change_res_shuxian(res_manufacturer)
            """ 描述 """
            res_description = self.p_description.findall(hc)
            res_description = change_res_shuxian(res_description)
            """ 寿命周期 """
            res_lifecycle = self.p_lifecycle.findall(hc)
            res_lifecycle = change_res_shuxian(res_lifecycle)
            """ 限制信息 """
            res_shiprest = self.p_shiprest.findall(hc)
            res_shiprest = change_res_shuxian(res_shiprest)
            """ 图片url """
            res_image_url = self.p_image_url.findall(hc)
            """ pdf url """
            res_pdf_url = self.p_pdf_url.findall(hc)
            """ 加上封装 """
            res_packaging = self.p_packaging.findall(hc)
            res_packaging = change_res_shuxian(res_packaging)
            if res_packaging:
                dict_info[keys_mouser_tt[14][1]] = res_packaging[0]

            """ 加上详细信息页面url  """
            res_info_url = self.p_info_url.findall(hc)
            if res_info_url:
                dict_info[keys_mouser_tt[27][1]] = res_info_url[0]

            """ 加上种类 """
            res_pc = self.p_pc.findall(hc)
            res_pc = change_res_shuxian(res_pc)
            if res_pc:
                dict_info[keys_mouser_tt[21][1]] = p_jkh.sub('',res_pc[0]).strip()

            if res_fsc:
                dict_info[keys_mouser_tt[22][1]] = res_fsc[0][0] + ' >> ' + res_fsc[0][1]
            if res_supp_partno and res_supp_partno[0] in ['Not Assigned','未分配']:
                if debug: print 'line 232 find mouser partno is invalid, so return {}'
                return {}
            if res_supp_partno:
                dict_info[keys_mouser_tt[18][1]] = res_supp_partno[0]
            if res_mfr_partno:
                dict_info[keys_mouser_tt[1][1]] = p_jkh.sub('', res_mfr_partno[0])
            if res_manufacturer:
                dict_info[keys_mouser_tt[2][1]] = res_manufacturer[0]
            if res_description:
                dict_info[keys_mouser_tt[3][1]] = p_jkh.sub('', res_description[0])
            if res_lifecycle:
                dict_info[keys_mouser_tt[4][1]] = p_jkh.sub('',res_lifecycle[0])
            if res_shiprest:
                ''' 匹配到页面发货限制的信息时,设置键字shiprest'''
                shiprest = res_shiprest[0]
                res_shiprestinfo = self.p_shiprestinfo.findall(shiprest)   
                ''' res_shiprestinfo: [('src','alt'),()] '''
                if res_shiprestinfo:
                    ''' 每个限制信息格式: src||alt  可能有一或者多个限制信息 每种限制信息以|||隔开  '''
                    sr_info = [[self.url_mouser_qz + src.replace('../',''), alt] for src,alt in res_shiprestinfo]
                    ''' [src||alt,...] '''
                    sr_info = ['||'.join(one) for one in sr_info]
                    ''' src||alt|||src||alt ... '''
                    sr_info = '|||'.join(sr_info)
                    dict_info[keys_mouser_tt[13][1]] = sr_info
                else:
                    dict_info[keys_mouser_tt[13][1]] = unicode('该产品存在发货限制,请上mouser网站查询','utf-8')

                ''' 设置禁运类型 EIP CCC BIG等 edit by daimingming on 2013年 03月 01日 星期五 14:38:21 CST '''
                res_shiprest_type   = self.p_shiprest_type.findall(shiprest)
                if res_shiprest_type:
                    dict_info[keys_mouser_tt[30][1]]    = '||'.join(res_shiprest_type)
                else:
                    dict_info[keys_mouser_tt[30][1]]    = unicode('该产品存在发货限制','utf-8')

            if res_image_url:
                dict_info[keys_mouser_tt[28][1]] = res_image_url[0].strip()
            if res_pdf_url:
                dict_info[keys_mouser_tt[23][1]] = res_pdf_url[0].strip()

            """ 处理详细信息 这里特殊处理 RoHS  格式为: attname:attvalue|||... """
            res_special_info = self.p_special_info.findall(hc)
            if res_special_info:
                list_special_info = []
                for an,av in res_special_info:
                    if an == 'RoHS':
                        if 'icon_rohs.gif' in av:
                            dict_info[keys_mouser_tt[24][1]] = 'Yes'
                        else:
                            dict_info[keys_mouser_tt[24][1]] = 'No'
                    else:
                        av = p_jkh.sub('',av).strip().replace(':','')
                        list_special_info.append('%s:%s' % (an,av))
                special_info = '|||'.join(list_special_info)
                dict_info[keys_mouser_tt[29][1]] = special_info
            
            """ 库存 """
            res_div_avai = self.p_div_availability.findall(hc)

            if res_div_avai:
                div_avai = res_div_avai[0]
                res_stock = self.p_stock.findall(div_avai)
                res_stock = change_res_shuxian(res_stock)
                res_on_order = self.p_on_order.findall(div_avai)
                res_on_order = change_res_shuxian(res_on_order)
                res_flt = self.p_flt.findall(div_avai)
                res_flt = change_res_shuxian(res_flt)
                if res_stock:
                    ''' 为了显示的更好效果 '''
                    info = res_stock[0]
                    info = info.replace('</tr>','\n').replace(',','')
                    info = p_jkh.sub('',info)
                    info = p_nbsp.sub('',info)
                    info = info.replace('  ','')

                    if info and info[-1] == '\n':
                        info = info[:-1]
                    else:
                        pass
                    dict_info[keys_mouser_tt[17][1]] = info

                    res_num = p_num.findall(info)#该正则表达式不来自配置文件
                    if res_num:
                        dict_info[keys_mouser_tt[5][1]] = res_num[0]
                    else:
                        dict_info[keys_mouser_tt[5][1]] = '0'

                if res_on_order:
                    info = p_jkh.sub('',res_on_order[0])
                    info = p_nbsp.sub('',info)
                    info = info.replace('  ','')
                    res_num_start = p_num_start.findall(info)
                    if res_num_start:
                        dict_info[keys_mouser_tt[6][1]] = res_num_start[0]
                    else:
                        dict_info[keys_mouser_tt[6][1]] = '0'
                if res_flt:
                    info = p_jkh.sub('',res_flt[0])
                    info = p_nbsp.sub('',info)
                    info = info.replace('  ','')
                    dict_info[keys_mouser_tt[7][1]] = info
            else:
                pass

            """ 价格 """
            res_minimum = self.p_minimum.findall(hc)
            res_minimum = change_res_shuxian(res_minimum)#
            res_multiples = self.p_multiples.findall(hc)
            res_multiples = change_res_shuxian(res_multiples)#
            if res_minimum:
                dict_info[keys_mouser_tt[8][1]] = int(res_minimum[0].replace(' ',''))
            else:
                dict_info[keys_mouser_tt[8][1]] = '0'
            if res_multiples:
                dict_info[keys_mouser_tt[9][1]] = int(res_multiples[0].replace(' ',''))
            else:
                dict_info[keys_mouser_tt[9][1]] = '0'

            res_div_price = self.p_div_price.findall(hc)
            if res_div_price:
                div_price = res_div_price[0]
                res_buy_quantity = self.p_buy_quantity.findall(div_price)
                res_buy_price = self.p_buy_price.findall(div_price)
                res_spe_xing = self.p_spe_xing.findall(div_price)
                res_spe_xing = change_res_shuxian(res_spe_xing)#
                res_spe_price = self.p_spe_price.findall(div_price)
                res_reel = self.p_reel.findall(div_price)
                res_more = self.p_more.findall(div_price)
                res_more = change_res_shuxian(res_more)#
                if res_buy_quantity and res_buy_price:
                    if len(res_buy_quantity) == len(res_buy_price):
                        """ 数量个数 与 购买价格个数 一致  """
                        list_price_normal = []
                        i = 0
                        while i < len(res_buy_quantity):
                            bq = res_buy_quantity[i].replace(',','')#'number'
                            bp = res_buy_price[i]
                            if bq and bp:
                                ''' 防止出现有购买数量 无购买价格的情况 如页面 http://cn.mouser.com/ProductDetail/Littelfuse/0202125H/?qs=sGAEpiMZZMseCiJT91fwIpCtAz8CGq9CFANOg93eDRM%3d'''
                                list_price_normal.append('%s:%s' % (bq,bp))
                            i += 1
                        price_info = '|||'.join(list_price_normal)
                        dict_info[keys_mouser_tt[10][1]] = price_info
                    else:
                        pass
                if res_buy_quantity and res_spe_price:
                    if len(res_buy_quantity) == len(res_spe_price) and res_spe_xing:
                        """ 数量个数 与 特殊价格个数 一致  """
                        list_price_special = []
                        i = 0
                        while i < len(res_buy_quantity):
                            bq = res_buy_quantity[i].replace(',','')#'number'
                            bp = res_spe_price[i]
                            if bq and bp:
                                list_price_special.append('%s:%s' % (bq,bp))
                            i += 1
                        price_spe_info = '|||'.join(list_price_special)
                        dict_info[keys_mouser_tt[16][1]] = price_spe_info
                    else:
                        pass
                if res_reel:
                    reel = res_reel[0]
                    reel = p_jkh.sub('',reel)
                    reel = reel.replace('  ','')
                    dict_info[keys_mouser_tt[11][1]] = reel
                if res_more:
                    more = res_more[0]
                    more = self.url_mouser_qz + more.replace('../','')
                    dict_info[keys_mouser_tt[12][1]] = more
            else:
                pass
            return dict_info
        except Exception,e:
            if debug:print 'line 353 exception :\n',e
            return {}
예제 #4
0
    def get_dict_info(self,hc,**dict_args):
        ''' 冲详细信息页面的代码中获取数据 {} '''
        fun         = 'function get_dict_info of %s' % self.name
        dict_info   = {}

        hc          = filter_html(hc)
        ''' 此时 hc 是否为详细信息页面的代码 '''
        iip         = dict_args.get('is_info_page', False)

        if iip:
            ''' 处在详细信息页面 '''
            res_imgurl      = p_imgurl_iip.findall(hc)
            res_partno      = p_partno_iip.findall(hc)
            res_mfr         = p_mfr_iip.findall(hc)
            res_desc        = p_desc_iip.findall(hc)
            res_stock       = p_stock_iip.findall(hc)
        else:
            res_imgurl      = p_imgurl.findall(hc)
            res_partno      = p_partno.findall(hc)
            res_mfr         = p_mfr.findall(hc)
            res_desc        = p_desc.findall(hc)
            res_stock       = p_stock.findall(hc)

        if res_imgurl:
            dict_info[keys_mouser_tt[28][1]] = p_jkh.sub('',res_imgurl[0]).replace('&quot;','').strip()

        if res_partno:
            dict_info[keys_mouser_tt[1][1]] = p_jkh.sub('',res_partno[0]).strip()

        if res_mfr:
            dict_info[keys_mouser_tt[2][1]] = p_jkh.sub('',res_mfr[0]).strip()

        if res_desc:
            dict_info[keys_mouser_tt[3][1]] = p_jkh.sub('',res_desc[0]).strip()

        if res_stock:
            dict_info[keys_mouser_tt[5][1]] = p_jkh.sub('',res_stock[0]).strip()


        ''' 处理区间价格 [('1-24','$82.08'),.('25 +','$61.56')] >>> ['1:$82.08','1000:$61.56'] >>> '1:$82.08|||25:$61.56'  '''
        if iip:
            ''' 详细信息页面 '''
            res_priceinfo  = p_priceinfo_iip.findall(hc)
            pc_pp_new = ['%s:%s' % (p_invalidnum.sub('',pc).strip(), pp) for pc,pp in res_priceinfo]
            pc_pp_finall = '|||'.join(pc_pp_new)
        else:
            ''' 现在为表格页面模式  获取该<tr>对应的详细信息页面url '''
            res_seemore     = p_seemore.findall(hc)
            res_priceinfo   = []#默认设置空
            if res_seemore:
                ''' 需要进入详细信息页面获取完整价格 '''
                res_uip         = p_uip.findall(hc)
                if res_uip:
                    dict_info[keys_mouser_tt[27][1]] = res_uip[0]
                    hc_more         = filter_html(get_html_urllib(res_uip[0], 2))
                    if hc_more  != 'timeout':
                        ''' 正常获取了 详细信息页面的价格信息 '''
                        res_priceinfo  = p_priceinfo_iip.findall(hc_more)
            else:
                ''' 只需从<tr>行字符串获取价格 即可 '''
                res_priceinfo       = p_priceinfo.findall(hc)

            pc_pp_new = ['%s:%s' % (p_invalidnum.sub('',pc).strip(), pp) for pc,pp in res_priceinfo]
            pc_pp_finall = '|||'.join(pc_pp_new)

        if pc_pp_finall:dict_info[keys_mouser_tt[10][1]] = pc_pp_finall

        ''' 加上型号搜索页面url  详细页面url 详细页面html代码 '''
        dict_info[keys_mouser_tt[19][1]] = self.url_search
        
        return dict_info
예제 #5
0
    def get_dict_info(self,hc):
        ''' 冲详细信息页面的代码中获取数据 {} '''
        fun = 'function get_dict_info of %s' % self.name
        dict_info = {}

        hc = filter_html(hc)

        res_mfr         = p_mfr.findall(hc)
        if res_mfr:
            dict_info[keys_mouser_tt[2][1]] = res_mfr[0]

        res_partno = p_partno.findall(hc)
        if res_partno:
            dict_info[keys_mouser_tt[1][1]] = res_partno[0]

        res_desc = p_desc.findall(hc)
        if res_desc:
            dict_info[keys_mouser_tt[3][1]] = p_jkh.sub('', res_desc[0]).strip()

        res_pdf = p_pdf.findall(hc)
        if res_pdf:
            dict_info[keys_mouser_tt[23][1]] = res_pdf[0]

        res_pack = p_pack.findall(hc)
        if res_pack:
            dict_info[keys_mouser_tt[14][1]] = res_pack[0].strip()

        res_rohs = p_rohs.findall(hc)
        if res_rohs:
            dict_info[keys_mouser_tt[24][1]] = res_rohs[0].strip()

        res_pc_pp = p_pc_pp.findall(hc)
        if res_pc_pp:
            ''' 处理区间价格 '''
            ''' [('1-24','$82.08'),.('over 1000','$61.56')] >>> ['1:$82.08','1000:$61.56'] >>> '1:$82.08|||1000:$61.56'  '''
            pc_pp_new = ['%s:%s' % (p_invalidnum.sub('',pc), pp) for pc,pp in res_pc_pp]
            pc_pp_finall = '|||'.join(pc_pp_new)
            dict_info[keys_mouser_tt[10][1]] = pc_pp_finall
              
        res_yourcost = p_yourcost.findall(hc)
        if res_yourcost:
            dict_info[keys_mouser_tt[25][1]] = res_yourcost[0].strip()
            
        res_table_lnq = p_table_lnq.findall(hc)
        if res_table_lnq:
            table_lnq = res_table_lnq[0]

            res_tr_lnq = p_tr_lnq.findall(table_lnq)
            qty = 0
            for l,n,q in res_tr_lnq:
                qty += int(q)

            dict_info[keys_mouser_tt[5][1]] = qty
            dict_info[keys_mouser_tt[26][1]] = res_tr_lnq

        ''' 加上型号搜索页面url  详细页面url 详细页面html代码 '''
        dict_info[keys_mouser_tt[19][1]] = self.url_search
        
        res_uip = p_uip.findall(hc)
        if res_uip:
            dict_info[keys_mouser_tt[20][1]] = res_uip[0]
        
        return dict_info