コード例 #1
0
ファイル: petpapa.py プロジェクト: hiadone/python_crawl
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			# 상품 카테고리
			#
			#self.set_product_category_first(product_data, soup)
			self.set_product_category_second(page_url, product_data, soup)

			###########################
			# 상품 이미지 확인
			#
			###########################
			self.set_product_image_second( product_data, product_ctx )

			# 품절여부 확인
			#
			self.set_product_soldout_first(product_data, product_ctx ) 


			###########################
			# 상품명/URL
			###########################
			
			crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx , 'p', 'name')
			if(crw_post_url == '') : crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx , 'strong', 'name')
			
			##############################
			# 가격
			# <ul class="xans-element- xans-product xans-product-listitem"><li class=" xans-record-">
			# <strong class="title displaynone"><span style="font-size:12px;color:#555555;font-weight:bold;"></span> :</strong><span style="font-size:12px;color:#555555;font-weight:bold;">₩15,000</span><span id="span_product_tax_type_text" style=""> </span></li>
			# </ul>
			##############################
			li_list = product_ctx.find_all('li')
			li_num = 0
			for li_ctx in li_list :
				li_num += 1
				value_str = li_ctx.get_text().strip()
				split_list = value_str.split(':')
				sub_split_list = split_list[1].split('(')
				price_str = sub_split_list[0].strip()
				if( li_num == 1) : product_data.crw_price = int( __UTIL__.get_only_digit( price_str ) )
				elif( li_num == 2) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( price_str ))
			
			
			if( crw_post_url != '' ) :
				self.set_product_url_hash( product_data, crw_post_url) 
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
コード例 #2
0
ファイル: amor_ange.py プロジェクト: hiadone/python_crawl
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			# 상품 카테고리
			#
			self.set_product_category_first(product_data, soup)

			###########################
			# 상품 이미지 확인
			#
			# <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb">
			###########################
			self.set_product_image_second( product_data, product_ctx )

			# 품절여부 확인
			self.set_product_soldout_first(product_data, product_ctx ) 

			###########################
			#
			# <p class="name">
			# <a href="/product/detail.html?product_no=286&amp;cate_no=43&amp;display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a>
			# </p>
			###########################
			
			crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'div', '-name')
	
			############################
			# 가격 및 브랜드
			#
			#<div class="xans-element- xans-product xans-product-listitem -description"><div rel="판매가" class=" xans-record-">
			#<span class="title displaynone"><span style="font-size:12px;color:#333333;font-weight:bold;">판매가</span> :</span> <span style="font-size:12px;color:#333333;font-weight:bold;">39,000원</span><span id="span_product_tax_type_text" style=""> </span></div>
			#</div>
			#
			############################
			price_div_list = product_ctx.find_all('div', {'rel':'판매가'})
			for price_div_ctx in price_div_list :
				
				span_list = price_div_ctx.find_all('span')
				for span_ctx in span_list :
					value_str = span_ctx.get_text().strip()
					if(value_str != '') and (value_str.find('판매가') < 0) and (value_str.find(':') < 0) :
						product_data.crw_price_sale = int( __UTIL__.get_only_digit( value_str ))	
						
			if( crw_post_url != '' ) :
				self.set_product_url_hash( product_data, crw_post_url) 
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
コード例 #3
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            #self.set_product_category_first(product_data, soup)
            self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            ###########################
            self.set_product_image_second(product_data, product_ctx)

            # 품절여부 확인
            #
            #  장바구니 이미지가 없으면 품절
            # <img src="/web/upload/icon_201905151555185500.png" onclick="CAPP_SHOP_NEW_PRODUCT_OPTIONSELECT.selectOptionCommon(856,  163, 'basket', '')" alt="장바구니 담기" class="ec-admin-icon cart">
            soldout_img_ctx = product_ctx.find('img',
                                               class_='ec-admin-icon cart')
            if (soldout_img_ctx == None): product_data.crw_is_soldout = 1

            ###########################
            # 상품명/URL
            ###########################

            crw_post_url = self.set_product_name_url_fourth(
                product_data, product_ctx, 'p', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_fourth(
                    product_data, product_ctx, 'strong', 'name')

            ##############################
            # 가격
            ##############################
            price_ctx = product_ctx.find('p', class_='prices')
            if (price_ctx != None):
                span_ctx = price_ctx.find('span', class_='price normal')
                if (span_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(span_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #4
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_first(product_data, soup)
            #product_data.crw_category1 = self.PAGE_URL_HASH[page_url]

            ###########################
            # 상품 이미지 확인
            #
            ###########################
            self.set_product_image_third(product_data, product_ctx)

            # 품절여부 확인
            #
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################

            crw_post_url = self.set_product_name_url_fourth(
                product_data, product_ctx, 'div', 'name')

            ##############################
            # 가격
            #
            # <div>
            # <p><span class="info displaynone"> / </span><span class="price normal">19,900 won</span><span class="price normal displaynone"></span></p>
            # <p class="icon"></p>
            # </div>
            ##############################
            #self.set_product_price_brand_first(product_data, product_ctx)

            price_ctx = product_ctx.find('span', class_='price normal')
            if (price_ctx != None):
                product_data.crw_price_sale = int(
                    __UTIL__.get_only_digit(price_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #5
0
ファイル: boondog.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_first(product_data, soup)
            #self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            ###########################

            self.set_product_image_third(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################
            crw_post_url = self.set_product_name_url_second(
                product_data, product_ctx, 'strong', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_second(
                    product_data, product_ctx, 'p', 'name')

            ##############################
            # 가격
            #
            ##############################
            li_list = product_ctx.find_all('li')
            for li_ctx in li_list:
                span_ctx = li_ctx.find_all('span')
                if (1 < len(span_ctx)):
                    split_list = span_ctx[0].get_text().strip().split('(')
                    value_str = split_list[0].strip()
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #6
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_first(product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            ###########################
            self.set_product_image_second(product_data, product_ctx)

            # 품절여부 확인
            #
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################

            crw_post_url = self.set_product_name_url_fourth(
                product_data, product_ctx, 'p', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_fourth(
                    product_data, product_ctx, 'strong', 'name')

            ##############################
            # 가격
            #
            # <p class="price">KRW 46,000</p>
            ##############################
            price_ctx = product_ctx.find('p', class_='price')
            if (price_ctx != None):
                product_data.crw_price_sale = int(
                    __UTIL__.get_only_digit(price_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #7
0
ファイル: janggeumi.py プロジェクト: hiadone/python_crawl
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			# 상품 카테고리
			#
			self.set_product_category_third(product_data, soup)

			###########################
			# 상품 이미지 확인
			#
			###########################
			self.set_product_image_fourth( product_data, product_ctx )

			# 품절여부 확인
			self.set_product_soldout_first(product_data, product_ctx ) 

			###########################
			#
			# <p class="name">
			# <a href="/product/detail.html?product_no=286&amp;cate_no=43&amp;display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a>
			# </p>
			###########################
			
			crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'strong', 'name')
			if(crw_post_url == '') : crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'p', 'name')
			
			
			##############################
			#
			#<p><strike class="displaynone"></strike><strong class="price">25,000원</strong></p>
			##############################
			strong_ctx = product_ctx.find('strong', class_='price')
			if(strong_ctx != None) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( strong_ctx.get_text().strip() ))	
			
			
			if( crw_post_url != '' ) :
				self.set_product_url_hash( product_data, crw_post_url) 
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
コード例 #8
0
ファイル: babiana.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            #self.set_product_category_first(product_data, soup)
            self.set_product_category_third(product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            # <a name="anchorBoxName_741" href="/product/detail.html?product_no=741&amp;cate_no=42&amp;display_group=1" class="prdImg"><img src="//babiana.co.kr/web/product/medium/201907/23d619d612a7e377f9f6eb3a8ffd193a.jpg" style="border: 1px solid transparent;" class="borderEffect" onmouseover="this.style.border='1px solid #8bc1c6'" onmouseout="this.style.border='1px solid transparent'"></a>
            ###########################

            self.set_product_image_first(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            #
            # <p class="name">
            # <a href="/product/detail.html?product_no=286&amp;cate_no=43&amp;display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a>
            # </p>
            ###########################

            crw_post_url = self.set_product_name_url_first(
                product_data, product_ctx, 'p', 'name')

            price_ctx = product_ctx.find('p', class_='price')
            if (price_ctx != None):
                product_data.crw_price_sale = int(
                    __UTIL__.get_only_digit(price_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #9
0
ファイル: pup_son.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            #self.set_product_category_first(product_data, soup)
            self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            ###########################
            self.set_product_image_fourth(product_data, product_ctx)

            # 품절여부 확인
            #
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################

            crw_post_url = self.set_product_name_url_first(
                product_data, product_ctx, 'div', '-name')

            ##############################
            # 가격
            # <div class="xans-element- xans-product xans-product-listitem -detail"><p rel="판매가" class=" xans-record-"><span class="title displaynone"><span style="font-size:12px;color:#008BCC;font-weight:bold;">판매가</span> :</span> <span class="-real"><span style="font-size:12px;color:#008BCC;font-weight:bold;">8,000원</span><span id="span_product_tax_type_text" style=""> </span></span></p>
            # <p rel="원산지" class=" xans-record-"><span class="title displaynone"><span style="font-size:12px;color:#555555;">원산지</span> :</span> <span class="-real"><span style="font-size:12px;color:#555555;">중국 yolan oem</span></span></p>
            # </div>
            ##############################
            p_list = product_ctx.find_all('p')
            for p_ctx in p_list:
                if ('rel' in p_ctx.attrs):
                    title_name = p_ctx.attrs['rel']
                    split_list = p_ctx.get_text().strip().split(':')
                    sub_split_list = split_list[1].strip().split('(')
                    value_str = sub_split_list[0].strip()
                    if (0 == title_name.find('브랜드')):
                        product_data.crw_brand1 = value_str
                    elif (0 == title_name.find('원산지')):
                        product_data.crw_brand2 = value_str
                    elif (0 == title_name.find('소비자가')):
                        product_data.crw_price = int(
                            __UTIL__.get_only_digit(value_str))
                    elif (0 == title_name.find('판매가')):
                        product_data.crw_price_sale = int(
                            __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #10
0
    def set_product_data(self, page_url, soup, img_ctx, name_ctx, price_ctx):

        #
        #
        try:

            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <a href="/shop/shopdetail.html?branduid=624477&amp;xcode=032&amp;mcode=002&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=032&amp;GfDT=Z213UQ%3D%3D"><img class="MS_prod_img_s" src="/shopimages/lovespet/0320020000533.gif?1590117644" alt=""></a>
            #
            ####################################

            img_list = img_ctx.find_all('img')
            for img_ctx in img_list:
                img_src = ''
                if ('src' in img_ctx.attrs):
                    split_list = img_ctx.attrs['src'].strip().split('?')
                    img_src = split_list[0].strip()

                if (img_src != ''):
                    img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src)
                    product_data.product_img = self.get_hangul_url_convert(
                        img_link)
                    break

            ####################################
            # 상품명 및 브랜드
            #
            # <strong class="name"><a href="/shop/shopdetail.html?branduid=624477&amp;xcode=032&amp;mcode=002&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=032&amp;GfDT=Z213UQ%3D%3D">도기스타 쿨하네스 ( S ~ XL )</a></strong>
            ####################################

            product_data.crw_name = name_ctx.get_text().strip()
            product_link_ctx = name_ctx.find('a')
            if (product_link_ctx != None):
                if ('href' in product_link_ctx.attrs):
                    crw_post_url = self.get_crw_post_url(
                        product_link_ctx, 'href')
                    if (crw_post_url != ''):
                        self.get_crw_goods_code(product_data, crw_post_url)
                        self.get_category_value(product_data, crw_post_url)

            ####################################
            # 가격 / 품절 여부 확인
            #
            # <li class="price">
            # <p class="price02"><strike>₩24,000</strike></p>
            # <p class="price03">₩24,000</p>
            # </li>
            #
            # ---- 품절시 ------
            # <li class="price">
            # <div class="sold">[품절상품]</div>
            # </li>
            ####################################

            sell_ctx = price_ctx.find('p', class_='price03')
            consumer_ctx = price_ctx.find('p', class_='price02')
            soldout_ctx = price_ctx.find('div', class_='sold')
            if (soldout_ctx != None): product_data.crw_is_soldout = 1

            if (consumer_ctx != None):
                product_data.crw_price = int(
                    __UTIL__.get_only_digit(consumer_ctx.get_text().strip()))

            if (sell_ctx != None):
                product_data.crw_price_sale = int(
                    __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #11
0
ファイル: flotshop.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            #self.set_product_category_first(product_data, soup)
            self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            # <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb">
            ###########################
            self.set_product_image_second(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            #
            # <p class="name">
            # <a href="/product/detail.html?product_no=286&amp;cate_no=43&amp;display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a>
            # </p>
            ###########################

            crw_post_url = self.set_product_name_url_first(
                product_data, product_ctx, 'p', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_first(
                    product_data, product_ctx, 'strong', 'name')

            ##############################
            #
            # <ul class="xans-element- xans-product xans-product-listitem"><!-- 일반목록 상품정보 --><li class=" xans-record-">
            # <strong class="title displaynone"><span style="font-size:14px;color:#25334d;font-weight:bold;">판매가</span> :</strong> <span style="font-size:14px;color:#25334d;font-weight:bold;text-decoration:line-through;">39,000원</span><span id="span_product_tax_type_text" style="text-decoration:line-through;"> </span></li>
            # <li class=" xans-record-">
            # <strong class="title "><span style="font-size:12px;color:#c71616;font-weight:bold;"></span> :</strong> <span style="font-size:12px;color:#c71616;font-weight:bold;">37,050원 <span style="font-size:11px;color:#555555;font-style:italic;">(1,950원 할인)</span></span></li>
            # </ul>
            #
            ##############################
            li_list = product_ctx.find_all('li')
            for li_ctx in li_list:
                strong_ctx = li_ctx.find('strong')
                span_ctx = li_ctx.find_all('span')
                if (strong_ctx != None):
                    if (1 < len(span_ctx)):
                        title_name = strong_ctx.get_text().strip()
                        split_list = span_ctx[1].get_text().strip().split('(')
                        value_str = split_list[0].strip()

                        if (0 == title_name.find('판매가')) and (
                                0 < title_name.find(':')):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(value_str))
                        elif (0 <= title_name.find(':')):
                            span_str = span_ctx[1].get_text().strip()
                            if (0 <= span_str.find('할인')) or (
                                    0 <= span_str.find('이벤트')):
                                product_data.crw_price_sale = int(
                                    __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #12
0
ファイル: petnoriter.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <div class="thumb salebox"> <a href="/shop/shopdetail.html?branduid=3356611&amp;xcode=002&amp;mcode=005&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=002&amp;GfDT=aG13UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/petnoriter/0020050000022.jpg?1590140914" alt="상품 섬네일"></a>
            # <input type="hidden" name="custom_price" value="49900">
            # <input type="hidden" name="product_price" value="28900">
            # <span class="sale_text" style="display: block;">42%</span> </div>
            #
            ####################################

            img_div_list = product_ctx.find_all('div', class_='thumb salebox')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            #
            # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='dsc')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            # <ul class="info">
            # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li>
            # <li class="subname">배변걱정 이제그만~</li>
            # <li class="price">28,900원</li>
            # <li class="consumer">49,900원</li>
            # <li class="icon"><span class="MK-product-icons"></span></li>
            # <li class="closeup"><a class="btn-overlay-show" href="javascript:viewdetail('002005000002', '1', '');"><img src="/images/common/view_shopdetail2.gif" alt="미리보기"></a></li>
            # <li class="cboth icon_option"></li>
            # </ul>
            #
            #
            ####################################

            div_list = product_ctx.find_all('ul')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('li', class_='price')
                consumer_ctx = div_ctx.find('li', class_='consumer')
                soldout_ctx = div_ctx.find('li', class_='soldout')
                if (soldout_ctx != None): product_data.crw_is_soldout = 1

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #13
0
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			

			# 상품 카테고리
			#
			#self.set_product_category_first(product_data, soup)
			self.set_product_category_second(page_url, product_data, soup)


			# 상품 이미지 확인
			self.set_product_image_first(product_data, product_ctx )
	

			# 품절여부 확인
			self.set_product_soldout_first(product_data, product_ctx ) 
			

			name_div_list = product_ctx.find_all('div', class_='sp-product__title')
			
			for name_div_ctx in name_div_list :
				
				product_link_list = name_div_ctx.find_all('a')
				for product_link_ctx in product_link_list :

					if('href' in product_link_ctx.attrs ) : 
						span_list = product_link_ctx.find_all('span')
						for span_ctx in span_list :
							name_value = span_ctx.get_text().strip()
							
							if(0 != name_value.find('상품명') ) and (0 != name_value.find(':') ) : product_data.crw_name = name_value
							
						tmp_product_link = product_link_ctx.attrs['href'].strip()
						if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() )
						crw_post_url = tmp_product_link

						if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'')
						
						split_list = crw_post_url.split('/')
						if( product_data.crw_name == '') : product_data.crw_name = split_list[4].strip()
						product_data.crw_goods_code = split_list[5].strip()
						

			div_list = product_ctx.find_all('div')
			for div_ctx in div_list :
				if('rel' in div_ctx.attrs) :
					title_name =  div_ctx.attrs['rel']
					span_list = div_ctx.find_all('span')
					for span_ctx in span_list :
						span_value = span_ctx.get_text().strip()
						if(span_value != '브랜드' ) and  (span_value != '판매가' ) and (span_value != '할인가' ) and (span_value != '' ) :
							if(title_name == '판매가') : product_data.crw_price = int( __UTIL__.get_only_digit( span_value ) )
							if(title_name == '할인가') : product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_value ) )
							if(title_name == '브랜드') : product_data.crw_brand1 = span_value
			
			if( crw_post_url != '' ) :
				self.set_product_url_hash( product_data, crw_post_url) 
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True
コード例 #14
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_first(product_data, soup)
            #self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            ###########################

            self.set_product_image_second(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################
            crw_post_url = self.set_product_name_url_second(
                product_data, product_ctx, 'strong', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_second(
                    product_data, product_ctx, 'p', 'name')

            ##############################
            # 가격
            #
            ##############################
            price_all_ctx = product_ctx.find('li', class_='price_all')
            if (price_all_ctx != None):
                custom_list = price_all_ctx.find_all('span', class_='custom')
                for custom_ctx in custom_list:
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(custom_ctx.get_text().strip()))

                custom_list = price_all_ctx.find_all('span', class_='pri')
                for custom_ctx in custom_list:
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(custom_ctx.get_text().strip()))

                custom_list = price_all_ctx.find_all('span', class_='strike')
                for custom_ctx in custom_list:
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(custom_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #15
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            ###########################
            self.set_product_image_fourth(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################
            crw_post_url = self.set_product_name_url_first(
                product_data, product_ctx, 'p', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_first(
                    product_data, product_ctx, 'strong', 'name')

            ###########################
            # 가격
            #
            # <ul class="xans-element- xans-product xans-product-listitem spec"><li class=" xans-record-">
            # <strong class="title displaynone"><span style="font-size:12px;color:#828282;font-weight:bold;">소비자가</span> :</strong> <span style="font-size:12px;color:#828282;font-weight:bold;text-decoration:line-through;">72,800원</span></li>
            # <li class=" xans-record-">
            # <strong class="title displaynone"><span style="font-size:12px;color:#2e2828;font-weight:bold;">판매가</span> :</strong> <span style="font-size:12px;color:#2e2828;font-weight:bold;">품절</span></li>
            # <li class=" xans-record-">
            # <strong class="title displaynone"><span style="font-size:12px;color:#de546e;font-weight:bold;">상품요약정보</span> :</strong> <span style="font-size:12px;color:#de546e;font-weight:bold;">속방석 양면사용이 가능하며 커버분리도 가능해요</span></li>
            # </ul>
            ###########################
            li_list = product_ctx.find_all('li')
            for li_ctx in li_list:

                strong_ctx = li_ctx.find('strong')
                span_ctx = li_ctx.find_all('span')
                if (strong_ctx != None):
                    if (1 < len(span_ctx)):
                        title_name = strong_ctx.get_text().strip()
                        split_list = span_ctx[1].get_text().strip().split('(')
                        value_str = split_list[0].strip()

                        if (0 == title_name.find('브랜드')):
                            product_data.crw_brand1 = value_str
                        elif (0 == title_name.find('제조사')):
                            product_data.crw_brand2 = value_str
                        elif (0 == title_name.find('소비자가')):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(value_str))
                        elif (0 == title_name.find('판매가')):
                            if (value_str == '품절'):
                                product_data.crw_is_soldout = 1
                                product_data.crw_price_sale = product_data.crw_price
                            else:
                                product_data.crw_price_sale = int(
                                    __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #16
0
ファイル: ecofoam.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <dd class="prd-img"><img class="MS_prod_img_s" src="/shopimages/ecofoam/0450010000053.jpg?1527467204" alt="상품 섬네일" title="상품 섬네일"></dd>
            #
            #
            # <dl class="item grid-item opa70" style="position: absolute; left: 0px; top: 0px;">
            #<a href="/shop/shopdetail.html?branduid=841206&amp;xcode=046&amp;mcode=004&amp;scode=&amp;type=Y&amp;sort=manual&amp;cur_code=046&amp;GfDT=bW53UQ%3D%3D">
            #
            #
            ####################################

            img_div_list = product_ctx.find_all('dd', class_='prd-img')
            for img_div_ctx in img_div_list:
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

            product_link_ctx = product_ctx.find('a')
            if (product_link_ctx != None):
                if ('href' in product_link_ctx.attrs):
                    crw_post_url = self.get_crw_post_url(
                        product_link_ctx, 'href')
                    if (crw_post_url != ''):
                        self.get_crw_goods_code(product_data, crw_post_url)
                        self.get_category_value(product_data, crw_post_url)

            ####################################
            # 상품명 및 브랜드
            #
            # <span class="prd-name ft_eb">도그자리 플랫<br></span>
            #
            # --- 품절시 상품명 ---
            # <span class="prd-name ft_eb">맘편한매트 소프트W<br>8세트(품절)</span>
            #
            # --- 브랜드 ---
            # <span class="prd-brand">도그자리</span>
            ####################################

            name_strong_ctx = product_ctx.find('span', class_='prd-name ft_eb')
            if (name_strong_ctx != None):
                crw_name = name_strong_ctx.get_text().strip()
                if (0 < crw_name.find('(품절)')):
                    product_data.crw_is_soldout = 1
                    tmp_crw_name = crw_name.replace('(품절)', '').strip()
                    crw_name = tmp_crw_name

                product_data.crw_name = crw_name

            name_strong_ctx = product_ctx.find('span', class_='prd-brand')
            if (name_strong_ctx != None):
                product_data.crw_brand1 = name_strong_ctx.get_text().strip()

            ####################################
            # 가격
            #
            # <span class="prd-price-discount"><del>75,000</del></span>
            #
            # <span class="prd-discount ft_eb">52,000&nbsp;원</span>
            #
            ####################################

            div_list = product_ctx.find_all('div', class_='prd-sub')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('span', class_='prd-discount ft_eb')
                consumer_ctx = div_ctx.find('span',
                                            class_='prd-price-discount')

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #17
0
ファイル: dontcrymypet.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_first(product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            # <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb">
            ###########################
            self.set_product_image_second(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            #
            # <p class="name">
            # <a href="/product/detail.html?product_no=286&amp;cate_no=43&amp;display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a>
            # </p>
            ###########################

            crw_post_url = self.set_product_name_url_first(
                product_data, product_ctx, 'strong', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_first(
                    product_data, product_ctx, 'p', 'name')

            ##############################
            #
            # <p class="price" style="padding:0px 0px 0px 0px;">
            # <span style="font-size:13px;"> <strike>33,000원</strike> &gt;
            # <span style="color:#f05e5e;  font-size:18px;"> <strong>17,000원</strong></span>
            # </span><span class="discount_rate " data-prod-custom="33000" data-prod-price="17000">48%</span></p>
            #
            ##############################
            price_p_list = product_ctx.find_all('p', class_='price')
            for price_p_ctx in price_p_list:
                strong_ctx = price_p_ctx.find('strong')
                strike_ctx = price_p_ctx.find('strike')
                if (strong_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(strong_ctx.get_text().strip()))
                if (strike_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(strike_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #18
0
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			self.reset_product_category(product_data)
			
			self.get_category_value( product_data, page_url, soup )
	
					
			####################################
			# 브랜드 추출	
			#
			# <div class="brand">SALLYS LAW</div>
			####################################
			brand_div_list = product_ctx.find_all('div', class_='brand')
			for brand_ctx in brand_div_list :
				product_data.crw_brand1 = brand_ctx.get_text().strip()
				
			####################################				
			# 상품 이미지 확인
			#
			# <div class="img">
			# <img src="//image.wconcept.co.kr/productimg/image/img1/96/300972496.jpg?RS=300" alt="">
			# </div>
			####################################
			img_div_list = product_ctx.find_all('div', class_='img')
			for img_div_ctx in img_div_list :
				img_list = img_div_ctx.find_all('img')
				for img_ctx in img_list :
					img_src = ''
					if('data-original' in img_ctx.attrs ) : img_src = img_ctx.attrs['data-original'].strip()
					elif('src' in img_ctx.attrs ) : img_src = img_ctx.attrs['src'].strip()
					
					split_list = img_src.split('?')
					img_src = split_list[0].strip()
					if( img_src != '' ) :
						img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src )
						product_data.product_img = self.get_hangul_url_convert( img_link )

			'''				
			####################################
			# 품절여부 추출
			####################################
			soldout_div_list = product_ctx.find_all('div', class_='item_icon_box')
			for soldout_div_ctx in soldout_div_list :
				img_list = soldout_div_ctx.find_all('img')
				for img_ctx in img_list :
					if('src' in img_ctx.attrs ) :
						if(0 < img_ctx.attrs['src'].find('soldout') ) : product_data.crw_is_soldout = 1

			'''

			
			####################################
			# 상품 링크 정보 및 상품명 / 상품코드
			#
			# <a href="/Product/300972496">
			# 
			####################################
			

			product_link_ctx = product_ctx.find('a')
			if( product_link_ctx != None ) :

				if('href' in product_link_ctx.attrs ) : 
					tmp_product_link = product_link_ctx.attrs['href'].strip()
					if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() )
					crw_post_url = tmp_product_link

					if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'')
				
					split_list = crw_post_url.split('/')
					product_data.crw_goods_code = split_list[4].strip()
					
				
			name_strong_list = product_ctx.find_all('div', class_='product ellipsis multiline')
			for name_strong_ctx in name_strong_list :
				product_data.crw_name = name_strong_ctx.get_text().strip()

			
			
			####################################
			# 가격
			#
			# <div class="price">
			# <span class="discount_price">74,400</span>
			# <span class="base_price">93,000</span>
			# <span class="discount_rate">20%</span>
			# </div>
			#
			####################################
			
			div_list = product_ctx.find_all('div', class_='price')
			for div_ctx in div_list :
				span_list = div_ctx.find_all('span')
				for span_ctx in span_list :
					if('class' in span_ctx.attrs ) :
						class_name_list = span_ctx.attrs['class']
						if(class_name_list[0] == 'base_price' ) : product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() ) )
						elif(class_name_list[0] == 'discount_price' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() ))
					
			
			if( crw_post_url != '' ) :
				#if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : 
				
				self.set_product_data_sub( product_data, crw_post_url )		
				self.process_product_api(product_data)
										
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
コード例 #19
0
ファイル: pethroom.py プロジェクト: hiadone/python_crawl
    def set_product_data_second(self, page_url, soup):

        #
        #
        try:
            product_data = ProductData()

            crw_post_url = page_url
            split_list = crw_post_url.split('?product_no=')
            crw_goods_code_list = split_list[1].strip().split('&')
            product_data.crw_goods_code = crw_goods_code_list[0].strip()

            # 상품 카테고리
            #

            product_data.crw_category1 = 'PRODUCT'
            split_list = self.PAGE_URL_HASH[page_url].split('|')
            idx = 0
            for split_data in split_list:
                idx += 1
                if (idx == 1): product_data.crw_category2 = split_data.strip()
                elif (idx == 2):
                    product_data.crw_category3 = split_data.strip()

            # 상품 이미지 확인

            img_list = soup.find_all('img', class_='BigImage')
            for img_ctx in img_list:
                if ('src' in img_ctx.attrs):
                    img_src = img_ctx.attrs['src'].strip()
                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        if (product_data.product_img == ''):
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            p_list = soup.find_all('p', class_='prd_model')
            for p_ctx in p_list:
                if (product_data.crw_name == ''):
                    product_data.crw_name = p_ctx.get_text().strip()

            # 품절여부 확인
            sold_out_ctx = soup.find('span', {'id': 'btnReserve'})
            if (sold_out_ctx != None):
                if ('class' in sold_out_ctx.attrs):
                    if ('displaynone' != sold_out_ctx.attrs['class'][0]):
                        product_data.crw_is_soldout = 1
                else:
                    product_data.crw_is_soldout = 1

            # 가격
            price_list = soup.find_all('div', class_='info_price')
            for price_ctx in price_list:
                sell_ctx = price_ctx.find('span', class_='sell')
                customer_ctx = price_ctx.find('span', class_='customer')
                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))
                if (customer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            customer_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #20
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리 추출
            ####################################
            if (self.C_DETAIL_CATEGORY_VALUE.strip() != ''):
                split_list = self.PAGE_URL_HASH[page_url].split('|')
                idx = 0
                for split_data in split_list:
                    idx += 1
                    if (idx == 1): product_data.crw_category1 = split_data
                    elif (idx == 2): product_data.crw_category2 = split_data
                    elif (idx == 3): product_data.crw_category3 = split_data

            #div_list = soup.find_all( 'div' , class_='sub_title_txt' )
            #for div_ctx in div_list :
            #	category_list = div_ctx.find_all( 'h2' )
            #	for category_ctx in category_list :
            #		product_data.crw_category1 = category_ctx.get_text().strip()
            '''			
			####################################
			# 브랜드 추출	
			####################################
			brand_div_list = product_ctx.find_all('span', class_='item_brand')
			for brand_ctx in brand_div_list :
				brand_name = brand_ctx.get_text().strip()
				if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip()
			'''

            ####################################
            # 상품 이미지 확인
            #
            # <div class="img">
            # <img src="https://img.mywisa.com/freeimg/smallstuff/_data/product/201803/31/9ee1628095bceaf0f9bb5d8dae079791.jpg" width="260" height="260">
            # <!-- 상품품절 영역 -->
            # 생략
            # </div>
            ####################################

            img_div_list = product_ctx.find_all('div', class_='prdimg')
            for img_div_ctx in img_div_list:
                img_ctx = img_div_ctx.find('img')

                #for img_ctx in img_list :
                if (img_ctx != None):
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

            ####################################
            # 품절여부 추출
            # 품절시 <div class="info out"> 으로 표현됨
            #
            # <!-- 상품품절 영역 -->
            #      <div class="soldout" onclick="location.href='https://www.smallstuff.kr/shop/detail.php?pno=A02FFD91ECE5E7EFEB46DB8F10A74059&amp;rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&amp;ctype=1&amp;cno1=1001'">Sold out</div>
            #      <!-- //상품품절 영역 -->
            ####################################

            if ('class' in product_ctx.attrs):
                class_name_list = product_ctx.attrs['class']
                # if( len(class_name_list) == 2 ) :
                if (class_name_list[0] == 'soldout'):
                    product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <p class="name"><a href="https://www.smallstuff.kr/shop/detail.php?pno=BEED13602B9B0E6ECB5B568FF5058F07&amp;rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&amp;ctype=1&amp;cno1=1001">DINING SET 1P OLIVE</a></p>
            #
            ####################################
            name_strong_list = product_ctx.find_all('p', class_='name')
            for name_strong_ctx in name_strong_list:
                product_link_ctx = name_strong_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        product_data.crw_name = product_link_ctx.get_text(
                        ).strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?pno=')
                        second_split_list = split_list[1].split('&')
                        product_data.crw_goods_code = second_split_list[
                            0].strip()

            ####################################
            # 가격
            #
            # 	<div class="price">

            # 	<p class="consumer consumer">KRW 24,000</p>

            # 	<p class="sell sell"><strong>KRW 22,800 </strong></p>
            # </div>
            #
            ####################################

            div_list = product_ctx.find_all('div', class_='price')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('p', class_='sell')
                # print('sell_ctx',sell_ctx)
                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))
                crw_price = div_ctx.find('p', class_='consumer')
                if (crw_price != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(crw_price.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #21
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_first(product_data, soup)
            #self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            ###########################
            self.set_product_image_third(product_data, product_ctx)

            # 품절여부 확인
            #
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################

            crw_post_url = self.set_product_name_url_second(
                product_data, product_ctx, 'strong', 'name')
            if (crw_post_url == ''):
                crw_post_url = self.set_product_name_url_second(
                    product_data, product_ctx, 'p', 'name')

            ##############################
            # 가격
            #
            # <ul class="xans-element- xans-product xans-product-listitem spec"><li class=" xans-record-"><span style="font-size:15px;color:#000000;font-weight:bold;">4,900원</span><span id="span_product_tax_type_text" style=""> </span></li>
            # </ul>
            ##############################
            li_list = product_ctx.find_all('li')
            idx = 0
            for li_ctx in li_list:
                span_ctx = li_ctx.find('span')
                if (span_ctx != None):
                    value_str = span_ctx.get_text().strip()
                    if ('style'
                            in span_ctx.attrs) and (0 <= value_str.find('원')):
                        span_style = span_ctx.attrs['style']
                        if (0 < span_style.find('line-through;')):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(value_str))
                        elif (0 < span_style.find('bold;')):
                            product_data.crw_price_sale = int(
                                __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #22
0
ファイル: cocochien.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <div class="thumb salebox">
            # <a href="/shop/shopdetail.html?branduid=3534594&amp;xcode=003&amp;mcode=001&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=003&amp;GfDT=aWt3UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/cocochien/0030010000152.jpg?1581790516" alt="상품 섬네일"></a>
            # <input type="hidden" name="custom_price" value="0">
            # <input type="hidden" name="product_price" value="34500">
            # <div id="sale_bg" style="display: none;"><span class="sale_text"></span></div>
            # <div class="info_icon">
            # <span class="m_quickview"><a class="btn-overlay-show" href="javascript:viewdetail('003001000015', '1', '');"><img src="/design/cocochien/0746amelie/info_icon02.gif"></a></span>										<span class="m_option"><img src="/shopimages/cocochien/bt_opt_preview.gif" onclick="javascript:mk_prd_option_preview('3534594',event);"></span>									</div><!-- //info_icon -->
            # </div>
            ####################################

            img_div_list = product_ctx.find_all('div', class_='thumb salebox')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            # <li class="dsc">앨리스튜튜</li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='dsc')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            #<ul class="info">
            # <li class="dsc">네이비도트원피스(50%SALE)SM,XL주문가능</li>
            # <li class="subname"></li>
            # <li class="consumer">26,000원</li>										<li class="price">13,000원</li>
            # <li class="icon"><span class="MK-product-icons"></span></li>
            # </ul>
            #
            #---------- 품절시 --------------------
            # <ul class="info">
            #	<li class="dsc">마카롱나시원피스(50%SALE)</li>
            #	<li class="subname"></li>
            #	<li class="soldout">SOLD OUT</li>
            #	<li class="icon"><span class="MK-product-icons"></span></li>
            #	</ul>
            ####################################

            div_list = product_ctx.find_all('ul')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('li', class_='price')
                consumer_ctx = div_ctx.find('li', class_='consumer')
                soldout_ctx = div_ctx.find('li', class_='soldout')
                if (soldout_ctx != None): product_data.crw_is_soldout = 1

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #23
0
ファイル: howlpot.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리 추출
            ####################################
            __LOG__.Trace(self.PAGE_URL_HASH[page_url])

            split_list = self.PAGE_URL_HASH[page_url].split('|')
            idx = 0
            for split_data in split_list:
                idx += 1
                if (idx == 1): product_data.crw_category1 = split_data
                elif (idx == 2): product_data.crw_category2 = split_data
                elif (idx == 3): product_data.crw_category3 = split_data
            '''
			div_list = soup.find_all( 'div' , class_='cntbody' )
			for div_ctx in div_list :				
				category_list = div_ctx.find_all( 'h2', class_='subtitle' )
				for category_ctx in category_list :
					product_data.crw_category1 = category_ctx.get_text().strip()
			'''
            '''			
			####################################
			# 브랜드 추출	
			####################################
			brand_div_list = product_ctx.find_all('span', class_='item_brand')
			for brand_ctx in brand_div_list :
				brand_name = brand_ctx.get_text().strip()
				if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip()
			'''

            ####################################
            # 상품 이미지 확인
            #
            # <div class="prdimg"><a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&amp;rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&amp;ctype=1&amp;cno1=1037"><img src="https://howlpotdesign.wisacdn.com/_data/product/d0dcc887757a47bd539823e77b7a3da6.jpg" width="292" height="292"></a></div>
            #
            ####################################

            img_div_list = product_ctx.find_all('div', class_='prdimg')
            for img_div_ctx in img_div_list:
                img_ctx = img_div_ctx.find('img')

                #for img_ctx in img_list :
                if (img_ctx != None):
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

            ####################################
            # 품절여부 추출
            # 품절시 <div class="box out"> 으로 표현됨
            #
            # <div class="box out">
            # <div class="no">03</div>
            # <div class="img">
            # 생략
            # </div>
            # <div class="info">
            # 생략
            # </div>
            # </div>
            #
            ####################################

            if ('class' in product_ctx.attrs):
                class_name_list = product_ctx.attrs['class']
                if (len(class_name_list) == 2):
                    if (class_name_list[1] == 'out'):
                        product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <div class="name">
            # <a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&amp;rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&amp;ctype=1&amp;cno1=1037">메모리폼_라이트 그레이</a>
            # <span class="wish"><a href="#" onclick="wishPartCartAjax(&quot;41AE36ECB9B3EEE609D05B90C14222FB&quot;, this); return false;">관심상품 담기</a></span>
            # </div>
            #
            ####################################
            name_strong_list = product_ctx.find_all('div', class_='name')
            for name_strong_ctx in name_strong_list:
                product_link_ctx = name_strong_ctx.find('a')
                if (product_link_ctx != None):
                    #__LOG__.Trace( product_link_ctx )
                    if ('href' in product_link_ctx.attrs):
                        product_data.crw_name = product_link_ctx.get_text(
                        ).strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?pno=')
                        second_split_list = split_list[1].split('&')
                        product_data.crw_goods_code = second_split_list[
                            0].strip()

            ####################################
            # 가격
            #
            # <div class="price">
            # <span class="sell"><span class="font">98,000</span></span>
            # </div>
            #
            ####################################

            div_list = product_ctx.find_all('div', class_='price')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('span', class_='sell')
                consumer_ctx = div_ctx.find('span', class_='consumer')
                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    # 타임세일일때  뒷부분의 별도의 값이 붙어서, 값 이상 문제 해결법,
                    crw_price_sale = sell_ctx.get_text().strip().split('\n')
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(crw_price_sale[0].strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #24
0
ファイル: studioalive.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_third(product_data, soup)
            #self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            ###########################
            self.set_product_image_fourth(product_data, product_ctx)

            # 품절여부 확인
            #
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################

            crw_post_url = self.set_product_name_url_first(
                product_data, product_ctx, 'div', 'name')

            ##############################
            # 가격
            #
            # <div class="xans-element- xans-product xans-product-listitem table"><div class="price  xans-record-"><span style="font-size:12px;color:#555555;text-decoration:line-through;">73,000원</span><span id="span_product_tax_type_text" style="text-decoration:line-through;"> </span></div>
            # <div class="saleprice  xans-record-"><span style="font-size:12px;color:#ff0000;">65,700원 <span style="font-size:12px;color:#ff0000;font-weight:bold;">(7,300원 할인)</span></span></div>
            # <div class="saleprice  xans-record-"><div class="discountPeriod">
            # <a href="#none"><img src="//img.echosting.cafe24.com/skin/base_ko_KR/product/btn_details.gif" alt="자세히"></a>
            # <div class="layerDiscountPeriod ec-base-tooltip" style="display: none;">
            # <div class="content">
            # <strong class="title"><img src="//img.echosting.cafe24.com/skin/base_ko_KR/common/ico_tip_title.gif" alt=""> 할인기간</strong>
            # <p><strong>남은시간 1794일 11:24:06 (7,300원 할인)</strong></p>
            # <p>2020-07-24 00:00 ~ 2025-07-01 23:55</p>
            # </div>
            # <a href="#none" class="close btnClose"><img src="//img.echosting.cafe24.com/skin/base_ko_KR/common/btn_close_tip.gif" alt="닫기"></a>
            # <span class="edge"></span>
            # </div>
            # </div></div>
            # <div class="saleprice  xans-record-"><div class="color"><span class="chips" title="#FFFFFF" style="background-color:#FFFFFF" color_no="" displaygroup="1"></span><span class="chips" title="#A9A9A9" style="background-color:#A9A9A9" color_no="" displaygroup="1"></span><span class="chips" title="#FEC0CB" style="background-color:#FEC0CB" color_no="" displaygroup="1"></span><span class="chips" title="#FFFFFF" style="background-color:#FFFFFF" color_no="" displaygroup="1"></span><span class="chips" title="#A9A9A9" style="background-color:#A9A9A9" color_no="" displaygroup="1"></span><span class="chips" title="#FEC0CB" style="background-color:#FEC0CB" color_no="" displaygroup="1"></span></div></div>
            # </div>
            ##############################
            #self.set_product_price_brand_first(product_data, product_ctx)
            price_div_list = product_ctx.find_all('div', class_='price')
            for price_div_ctx in price_div_list:
                product_data.crw_price = int(
                    __UTIL__.get_only_digit(price_div_ctx.get_text().strip()))

            sale_price_div_list = product_ctx.find_all('div',
                                                       class_='saleprice')
            for sale_price_div_ctx in sale_price_div_list:
                check_div_ctx = sale_price_div_ctx.find('div')
                #div 가 없어야 함.
                if (check_div_ctx == None):
                    split_list = sale_price_div_ctx.get_text().strip().split(
                        '(')
                    value_str = split_list[0].strip()
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #25
0
ファイル: oraeorae.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=803964&amp;xcode=035&amp;mcode=003&amp;scode=&amp;type=Y&amp;sort=manual&amp;cur_code=035&amp;GfDT=aGV%2BVA%3D%3D"><img class="MS_prod_img_s" src="/shopimages/aromnaom/0330290001733.jpg?1581494094"></a></dt>
            #
            ####################################

            img_div_list = product_ctx.find_all('dt', class_='thumb')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            #
            # <li class="prd-name">[애니케어] 면역을 위한 멀티파우더 <span class="MK-product-icons"></span></li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='prd-name')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            # <li class="prd-price">74,800원</li>
            #
            # ---- 품절시  -------
            # <li class="prd-price">
            # <span class="fc-red">품절</span>
            # </li>
            ####################################

            div_list = product_ctx.find_all('ul')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('li', class_='prd-price')
                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))
                    soldout_ctx = div_ctx.find('span', class_='fc-red')
                    if (soldout_ctx != None): product_data.crw_is_soldout = 1

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #26
0
ファイル: pupping.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            #self.set_product_category_first(product_data, soup)
            self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            ###########################

            self.set_product_image_fourth(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            # 상품명/URL
            ###########################
            crw_post_url = self.set_product_name_url_fifth(
                product_data, product_ctx, 'div', 'item_name')

            ##############################
            # 가격
            # <ul class="xans-element- xans-product xans-product-listitem"><li class=" xans-record-">
            # <span class="title displaynone"><span style="font-size:12px;color:#929292;">소비자가</span> :</span> <span style="font-size:12px;color:#929292;text-decoration:line-through;">159,000원</span></li>
            # <li class=" xans-record-">
            # <span class="title displaynone"><span style="font-size:16px;color:#3e3a39;font-weight:bold;">판매가</span> :</span> <span style="font-size:16px;color:#3e3a39;font-weight:bold;">127,000원</span><span id="span_product_tax_type_text" style=""> </span></li>
            # </ul>
            ##############################
            li_list = product_ctx.find_all('li')
            for li_ctx in li_list:
                span_ctx = li_ctx.find_all('span')
                if (2 < len(span_ctx)):
                    title_name = span_ctx[1].get_text().strip()
                    split_list = span_ctx[2].get_text().strip().split('(')
                    value_str = split_list[0].strip()

                    if (0 == title_name.find('소비자가')):
                        product_data.crw_price = int(
                            __UTIL__.get_only_digit(value_str))
                    elif (0 == title_name.find('판매가')):
                        product_data.crw_price_sale = int(
                            __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #27
0
ファイル: dfang.py プロジェクト: hiadone/python_crawl
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.CRW_CATEGORY_1 = ''
            self.CRW_CATEGORY_2 = ''
            self.CRW_CATEGORY_3 = ''

            split_list = self.PAGE_URL_HASH[page_url].split('|')

            idx = 0
            for split_data in split_list:
                idx += 1
                if (idx == 1): product_data.crw_category1 = split_data
                elif (idx == 2): product_data.crw_category2 = split_data
                elif (idx == 3): product_data.crw_category3 = split_data

            #self.set_product_category_second(page_url, product_data, soup)
            self.CRW_CATEGORY_1 = product_data.crw_category1
            self.CRW_CATEGORY_2 = product_data.crw_category2
            self.CRW_CATEGORY_3 = product_data.crw_category3

            #product_data.crw_category1 = self.PAGE_URL_HASH[ page_url ]

            # 상품 이미지 확인
            self.set_product_image_fourth(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            img_div_list = product_ctx.find_all(
                'div', class_=self.C_PRODUCT_IMG_SELECTOR_CLASSNAME)

            for img_div_ctx in img_div_list:
                product_link_ctx = img_div_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            split_list = crw_post_url.split('/')
                            if (5 < len(split_list)):
                                product_data.crw_goods_code = split_list[
                                    5].strip()

            name_div_list = product_ctx.find_all('div', class_='description')

            for name_div_ctx in name_div_list:
                #
                # 상품 링크 정보 및 상품명 / 상품코드
                #
                name_strong_list = name_div_ctx.find_all('strong',
                                                         class_='name')
                for name_strong_ctx in name_strong_list:
                    product_link_list = name_strong_ctx.find_all('a')
                    for product_link_ctx in product_link_list:
                        span_list = product_link_ctx.find_all('span')
                        for span_ctx in span_list:
                            name_value = span_ctx.get_text().strip()
                            if (0 != name_value.find('상품명')) and (
                                    0 != name_value.find(':')) and (
                                        name_value.strip() != ''):
                                product_data.crw_name = name_value

                # 가격
                li_list = name_div_ctx.find_all('li')
                for li_ctx in li_list:
                    span_list = li_ctx.find_all('span')
                    for span_ctx in span_list:
                        price_value = span_ctx.get_text().strip()
                        if (price_value != '') and (price_value != ':'):
                            if ('style' in span_ctx.attrs):
                                if (0 < span_ctx.attrs['style'].find(
                                        'text-decoration')):
                                    product_data.crw_price = int(
                                        __UTIL__.get_only_digit(price_value))
                                else:
                                    product_data.crw_price_sale = int(
                                        __UTIL__.get_only_digit(price_value))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #28
0
ファイル: dermadog.py プロジェクト: hiadone/python_crawl
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			####################################				
			# 상품 이미지 확인
			# 상품 링크 정보 및 상품코드
			# 카테고리
			#
			# <dt class="thumb"><a href="/shop/shopdetail.html?branduid=10163894&amp;xcode=001&amp;mcode=005&amp;scode=003&amp;type=X&amp;sort=manual&amp;cur_code=001&amp;GfDT=bml9W1w%3D"><img class="MS_prod_img_m" src="/shopimages/dermadog/0010050000192.jpg?1591754112" alt="상품 섬네일" title="상품 섬네일"></a></dt>
			####################################

			img_div_list = product_ctx.find_all('dt', class_='thumb')
			for img_div_ctx in img_div_list :
				img_list = img_div_ctx.find_all('img')
				for img_ctx in img_list :
					img_src = ''
					if('src' in img_ctx.attrs ) : 
						split_list = img_ctx.attrs['src'].strip().split('?')
						img_src = split_list[0].strip()
						
					if( img_src != '' ) :
						img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src )
						product_data.product_img = self.get_hangul_url_convert( img_link )
						break

				product_link_ctx = img_div_ctx.find('a')
				if( product_link_ctx != None) :
					if('href' in product_link_ctx.attrs ) : 
						crw_post_url = self.get_crw_post_url( product_link_ctx, 'href')
						if(crw_post_url != '') :
							self.get_crw_goods_code( product_data, crw_post_url )
							self.get_category_value( product_data, crw_post_url )
						

			
			####################################
			# 상품명 및 브랜드
			#
			# <dd class="prd-info">
			# <ul>  
			# <li class="prd-brand"><span class="MK-product-icons"><img src="/shopimages/dermadog/prod_icons/4154?1591753540" class="MK-product-icon-2"></span></li>
			# <li class="prd-name"><a href="/shop/shopdetail.html?branduid=10163894&amp;xcode=001&amp;mcode=005&amp;scode=003&amp;type=X&amp;sort=manual&amp;cur_code=001&amp;GfDT=bml9W1w%3D">연어/스킨 헬스츄 15g</a></li>
			# </ul>
			# </dd>
			#
			####################################
			name_dd_list = product_ctx.find_all('dd', class_='prd-info')
			for name_dd_ctx in name_dd_list :
				name_ctx = name_dd_ctx.find('li', class_='prd-name')
				if( name_ctx != None) : product_data.crw_name = name_ctx.get_text().strip()
				
				brand_ctx = name_dd_ctx.find('li', class_='prd-brand')
				if( brand_ctx != None) : product_data.crw_brand1 = brand_ctx.get_text().strip()


			####################################
			# 가격 / 품절 여부 확인
			#
			#
			# <p class="price-info">
			# <strike>10,000</strike><br>
			# <span class="won">₩</span><span class="price">9,000</span>
			# </p>
			#
			#---- 품절시  -------
			#
			# <p class="price-info">
			# Sold Out
			# </p>
			#
			####################################
			
			div_list = product_ctx.find_all('p', class_='price-info')
			for div_ctx in div_list :
				price_str = div_ctx.get_text().strip()
				if(0 <= price_str.find('Out')) : product_data.crw_is_soldout = 1
				
				sell_ctx = div_ctx.find('span', class_='price')
				consumer_ctx = div_ctx.find('strike')
					
				if( consumer_ctx != None ) : product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip() ))

				if( sell_ctx != None ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( sell_ctx.get_text().strip() ))
			

			if( crw_post_url != '' ) :
				#if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : 
				
				self.set_product_data_sub( product_data, crw_post_url )			
				self.process_product_api(product_data)
										
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
コード例 #29
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #self.set_product_category_first(product_data, soup)
            self.set_product_category_second(page_url, product_data, soup)

            ###########################
            # 상품 이미지 확인
            #
            # <div class="prdImg">
            # <a href="/product/chicken-terrine/304/category/137/display/1/" name="anchorBoxName_304"><img src="//bymona.co.kr/web/product/medium/202008/35319c8b46eba6ca86653a26193b993d.jpg" id="eListPrdImage304_1" alt="chicken terrine"></a>
            # </div>
            ###########################
            self.set_product_image_fourth(product_data, product_ctx)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            ###########################
            #
            # <strong class="name"><a href="/product/chicken-terrine/304/category/137/display/1/" class=""><span class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</span> <span style="font-size:12px;color:#555555;">chicken terrine</span></a></strong>
            ###########################

            crw_post_url = self.set_product_name_url_second(
                product_data, product_ctx, 'strong', 'name')

            ############################
            #
            # <ul class="xans-element- xans-product xans-product-listitem left"><li class=" xans-record-">
            # <span class="title displaynone"><span style="font-size:12px;color:#000000;font-weight:bold;">판매가</span> :</span> <span style="font-size:12px;color:#000000;font-weight:bold;">4,000 won</span><span id="span_product_tax_type_text" style=""> </span></li>
            # </ul>
            #
            ############################
            li_list = product_ctx.find_all('li')
            for li_ctx in li_list:
                value_str = li_ctx.get_text().strip()
                split_list = value_str.split(':')
                if (0 <= value_str.find('브랜드')) and (0 < value_str.find(':')):
                    product_data.crw_brand1 = split_list[1].strip()
                elif (0 <= value_str.find('판매가')) and (0 <
                                                       value_str.find(':')):
                    sub_split_list = split_list[1].split('(')
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sub_split_list[0].strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
コード例 #30
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            category_ctx_list = soup.select(
                'body > div.body_wrap > div.content_wrap > div.section_tit > div.close'
            )

            for category_ctx in category_ctx_list:
                split_list = category_ctx.get_text().strip().split('>')
                idx = 0
                for split_data in split_list:
                    idx += 1
                    category_name = split_data.strip()
                    if (idx == 2): product_data.crw_category1 = category_name
                    elif (idx == 3): product_data.crw_category2 = category_name
                    elif (idx == 4): product_data.crw_category3 = category_name

            #split_list = self.PAGE_URL_HASH[page_url].split('(')
            #product_data.crw_category1 = split_list[0].replace('BEST','').strip()

            ####################################
            # 브랜드 추출
            #
            # <div class="line_sub">
            # 한국산				</div>
            ####################################

            div_list = product_ctx.find_all('div', class_='line_sub')
            for div_ctx in div_list:
                brand_str = div_ctx.get_text().strip()
                product_data.crw_brand1 = brand_str

            ####################################
            # 상품 이미지 확인 / 상품 링크 정보 / 상품번호
            #
            # <div class="picture"><a href="./product.html?pd_code=A010489&amp;event_type=%C3%CA%C6%AF%B0%A1"><img src="http://queenpuppy.co.kr/shop/pd_img/A01/489/A010489_2.jpg"></a></div>
            ####################################

            span_list = product_ctx.find_all('div', class_='picture')
            for span_ctx in span_list:
                product_link_ctx = span_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('&event_type=')
                        crw_post_url = split_list[0].strip()

                        split_list = crw_post_url.split('?pd_code=')
                        sub_split_list = split_list[1].strip().split('&')
                        product_data.crw_goods_code = sub_split_list[0]

                    img_list = product_link_ctx.find_all('img')
                    for img_ctx in img_list:
                        img_src = ''
                        if ('data-original' in img_ctx.attrs):
                            img_src = img_ctx.attrs['data-original'].strip()
                        elif ('src' in img_ctx.attrs):
                            img_src = img_ctx.attrs['src'].strip()

                        if (img_src != ''):
                            img_link = self.set_img_url(
                                self.BASIC_IMAGE_URL, img_src)
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            ####################################
            # 상품명
            #
            # <div class="name">
            # <div style="color:#fd705f; font-weight: bold; valign:top; height: 15px; padding-bottom: 3px;"></div>
            # <a href="./product.html?pd_code=A010489&amp;event_type=%C3%CA%C6%AF%B0%A1">
            # 건국유업 프로젝트 닥터케이 펫밀크 10개입										</a>
            # </div>
            ####################################
            name_div_list = product_ctx.find_all('div', class_='name')
            for name_div_ctx in name_div_list:
                span_ctx = name_div_ctx.find('a')
                if (span_ctx != None):
                    crw_name = span_ctx.get_text().strip()
                    product_data.crw_name = crw_name
                    if (0 < crw_name.find('[품절]')):
                        product_data.crw_is_soldout = 1
                        product_data.crw_name = crw_name.replace('[품절]',
                                                                 '').strip()

            ####################################
            # 가격
            #
            #
            # <div class="line_np">20,000원</div>
            # <div class="line_sp">
            # 12,000원
            # <span style="font-size: 0.8em; color: #666; vertical-align:bottom;">40%↓</span>									</div>
            ####################################

            div_list = product_ctx.find_all('div', class_='line_np')
            for div_ctx in div_list:
                price_str = div_ctx.get_text().strip()
                product_data.crw_price = int(
                    __UTIL__.get_only_digit(price_str))

            div_list = product_ctx.find_all('div', class_='line_sp')
            for div_ctx in div_list:
                price_str = div_ctx.get_text().strip()
                span_ctx = div_ctx.find('span')
                split_str = ''
                if (span_ctx != None): split_str = span_ctx.get_text().strip()
                if (split_str == ''):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(price_str.strip()))
                else:
                    split_list = price_str.split(split_str)
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(split_list[0].strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True