def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=111062&xcode=020&mcode=004&scode=&type=X&sort=regdate&cur_code=020&GfDT=bmx9W1w%3D"><img class="MS_prod_img_m" src="/shopimages/affetto/0200040000052.jpeg?1563325993"></a></dt> # #################################### img_div_list = product_ctx.find_all('dt', class_='thumb') for img_div_ctx in img_div_list: img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break product_link_ctx = img_div_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) #################################### # 상품명 및 브랜드 # # <li class="prd-name"><a href="/shop/shopdetail.html?branduid=111062&xcode=020&mcode=004&scode=&type=X&sort=regdate&cur_code=020&GfDT=bmx9W1w%3D">이글루 하우스 - M / L</a></li> #################################### name_dd_list = product_ctx.find_all('dd', class_='prd-info') for name_dd_ctx in name_dd_list: name_ctx = name_dd_ctx.find('li', class_='prd-name') if (name_ctx != None): product_data.crw_name = name_ctx.get_text().strip() brand_ctx = name_dd_ctx.find('li', class_='prd-brand') if (brand_ctx != None): product_data.crw_brand1 = brand_ctx.get_text().strip() #################################### # 가격 / 품절 여부 확인 # # # <li class="prd-price"> # <strike>95,000원</strike> # <span class="price">85,500원</span> # </li> # # ---- 품절시 ---- # <li class="prd-price"> # Sold Out # </li> #################################### div_list = product_ctx.find_all('li', class_='prd-price') for div_ctx in div_list: sell_ctx = div_ctx.find('span', class_='price') consumer_ctx = div_ctx.find('strike') soldout_str = div_ctx.get_text().strip() if (0 < soldout_str.find('Out')): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, img_ctx, name_ctx, price_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <a href="/shop/shopdetail.html?branduid=624477&xcode=032&mcode=002&scode=&type=X&sort=manual&cur_code=032&GfDT=Z213UQ%3D%3D"><img class="MS_prod_img_s" src="/shopimages/lovespet/0320020000533.gif?1590117644" alt=""></a> # #################################### img_list = img_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break #################################### # 상품명 및 브랜드 # # <strong class="name"><a href="/shop/shopdetail.html?branduid=624477&xcode=032&mcode=002&scode=&type=X&sort=manual&cur_code=032&GfDT=Z213UQ%3D%3D">도기스타 쿨하네스 ( S ~ XL )</a></strong> #################################### product_data.crw_name = name_ctx.get_text().strip() product_link_ctx = name_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) #################################### # 가격 / 품절 여부 확인 # # <li class="price"> # <p class="price02"><strike>₩24,000</strike></p> # <p class="price03">₩24,000</p> # </li> # # ---- 품절시 ------ # <li class="price"> # <div class="sold">[품절상품]</div> # </li> #################################### sell_ctx = price_ctx.find('p', class_='price03') consumer_ctx = price_ctx.find('p', class_='price02') soldout_ctx = price_ctx.find('div', class_='sold') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit(consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=803964&xcode=035&mcode=003&scode=&type=Y&sort=manual&cur_code=035&GfDT=aGV%2BVA%3D%3D"><img class="MS_prod_img_s" src="/shopimages/aromnaom/0330290001733.jpg?1581494094"></a></dt> # #################################### img_div_list = product_ctx.find_all('dt', class_='thumb') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # # <li class="prd-name">[애니케어] 면역을 위한 멀티파우더 <span class="MK-product-icons"></span></li> #################################### name_strong_list = product_ctx.find_all('li', class_='prd-name') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # # <li class="prd-price">74,800원</li> # # ---- 품절시 ------- # <li class="prd-price"> # <span class="fc-red">품절</span> # </li> #################################### div_list = product_ctx.find_all('ul') for div_ctx in div_list: sell_ctx = div_ctx.find('li', class_='prd-price') if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) soldout_ctx = div_ctx.find('span', class_='fc-red') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data_second(self, page_url, soup): # # try: product_data = ProductData() crw_post_url = page_url split_list = crw_post_url.split('?product_no=') crw_goods_code_list = split_list[1].strip().split('&') product_data.crw_goods_code = crw_goods_code_list[0].strip() # 상품 카테고리 # product_data.crw_category1 = 'PRODUCT' split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category2 = split_data.strip() elif (idx == 2): product_data.crw_category3 = split_data.strip() # 상품 이미지 확인 img_list = soup.find_all('img', class_='BigImage') for img_ctx in img_list: if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) if (product_data.product_img == ''): product_data.product_img = self.get_hangul_url_convert( img_link) p_list = soup.find_all('p', class_='prd_model') for p_ctx in p_list: if (product_data.crw_name == ''): product_data.crw_name = p_ctx.get_text().strip() # 품절여부 확인 sold_out_ctx = soup.find('span', {'id': 'btnReserve'}) if (sold_out_ctx != None): if ('class' in sold_out_ctx.attrs): if ('displaynone' != sold_out_ctx.attrs['class'][0]): product_data.crw_is_soldout = 1 else: product_data.crw_is_soldout = 1 # 가격 price_list = soup.find_all('div', class_='info_price') for price_ctx in price_list: sell_ctx = price_ctx.find('span', class_='sell') customer_ctx = price_ctx.find('span', class_='customer') if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (customer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( customer_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <dd class="prd-img"><img class="MS_prod_img_s" src="/shopimages/ecofoam/0450010000053.jpg?1527467204" alt="상품 섬네일" title="상품 섬네일"></dd> # # # <dl class="item grid-item opa70" style="position: absolute; left: 0px; top: 0px;"> #<a href="/shop/shopdetail.html?branduid=841206&xcode=046&mcode=004&scode=&type=Y&sort=manual&cur_code=046&GfDT=bW53UQ%3D%3D"> # # #################################### img_div_list = product_ctx.find_all('dd', class_='prd-img') for img_div_ctx in img_div_list: img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break product_link_ctx = product_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) #################################### # 상품명 및 브랜드 # # <span class="prd-name ft_eb">도그자리 플랫<br></span> # # --- 품절시 상품명 --- # <span class="prd-name ft_eb">맘편한매트 소프트W<br>8세트(품절)</span> # # --- 브랜드 --- # <span class="prd-brand">도그자리</span> #################################### name_strong_ctx = product_ctx.find('span', class_='prd-name ft_eb') if (name_strong_ctx != None): crw_name = name_strong_ctx.get_text().strip() if (0 < crw_name.find('(품절)')): product_data.crw_is_soldout = 1 tmp_crw_name = crw_name.replace('(품절)', '').strip() crw_name = tmp_crw_name product_data.crw_name = crw_name name_strong_ctx = product_ctx.find('span', class_='prd-brand') if (name_strong_ctx != None): product_data.crw_brand1 = name_strong_ctx.get_text().strip() #################################### # 가격 # # <span class="prd-price-discount"><del>75,000</del></span> # # <span class="prd-discount ft_eb">52,000 원</span> # #################################### div_list = product_ctx.find_all('div', class_='prd-sub') for div_ctx in div_list: sell_ctx = div_ctx.find('span', class_='prd-discount ft_eb') consumer_ctx = div_ctx.find('span', class_='prd-price-discount') if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) category_ctx_list = soup.select( 'body > div.body_wrap > div.content_wrap > div.section_tit > div.close' ) for category_ctx in category_ctx_list: split_list = category_ctx.get_text().strip().split('>') idx = 0 for split_data in split_list: idx += 1 category_name = split_data.strip() if (idx == 2): product_data.crw_category1 = category_name elif (idx == 3): product_data.crw_category2 = category_name elif (idx == 4): product_data.crw_category3 = category_name #split_list = self.PAGE_URL_HASH[page_url].split('(') #product_data.crw_category1 = split_list[0].replace('BEST','').strip() #################################### # 브랜드 추출 # # <div class="line_sub"> # 한국산 </div> #################################### div_list = product_ctx.find_all('div', class_='line_sub') for div_ctx in div_list: brand_str = div_ctx.get_text().strip() product_data.crw_brand1 = brand_str #################################### # 상품 이미지 확인 / 상품 링크 정보 / 상품번호 # # <div class="picture"><a href="./product.html?pd_code=A010489&event_type=%C3%CA%C6%AF%B0%A1"><img src="http://queenpuppy.co.kr/shop/pd_img/A01/489/A010489_2.jpg"></a></div> #################################### span_list = product_ctx.find_all('div', class_='picture') for span_ctx in span_list: product_link_ctx = span_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('&event_type=') crw_post_url = split_list[0].strip() split_list = crw_post_url.split('?pd_code=') sub_split_list = split_list[1].strip().split('&') product_data.crw_goods_code = sub_split_list[0] img_list = product_link_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 상품명 # # <div class="name"> # <div style="color:#fd705f; font-weight: bold; valign:top; height: 15px; padding-bottom: 3px;"></div> # <a href="./product.html?pd_code=A010489&event_type=%C3%CA%C6%AF%B0%A1"> # 건국유업 프로젝트 닥터케이 펫밀크 10개입 </a> # </div> #################################### name_div_list = product_ctx.find_all('div', class_='name') for name_div_ctx in name_div_list: span_ctx = name_div_ctx.find('a') if (span_ctx != None): crw_name = span_ctx.get_text().strip() product_data.crw_name = crw_name if (0 < crw_name.find('[품절]')): product_data.crw_is_soldout = 1 product_data.crw_name = crw_name.replace('[품절]', '').strip() #################################### # 가격 # # # <div class="line_np">20,000원</div> # <div class="line_sp"> # 12,000원 # <span style="font-size: 0.8em; color: #666; vertical-align:bottom;">40%↓</span> </div> #################################### div_list = product_ctx.find_all('div', class_='line_np') for div_ctx in div_list: price_str = div_ctx.get_text().strip() product_data.crw_price = int( __UTIL__.get_only_digit(price_str)) div_list = product_ctx.find_all('div', class_='line_sp') for div_ctx in div_list: price_str = div_ctx.get_text().strip() span_ctx = div_ctx.find('span') split_str = '' if (span_ctx != None): split_str = span_ctx.get_text().strip() if (split_str == ''): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_str.strip())) else: split_list = price_str.split(split_str) product_data.crw_price_sale = int( __UTIL__.get_only_digit(split_list[0].strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <div class="thumb salebox"> <a href="/shop/shopdetail.html?branduid=3356611&xcode=002&mcode=005&scode=&type=X&sort=manual&cur_code=002&GfDT=aG13UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/petnoriter/0020050000022.jpg?1590140914" alt="상품 섬네일"></a> # <input type="hidden" name="custom_price" value="49900"> # <input type="hidden" name="product_price" value="28900"> # <span class="sale_text" style="display: block;">42%</span> </div> # #################################### img_div_list = product_ctx.find_all('div', class_='thumb salebox') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li> #################################### name_strong_list = product_ctx.find_all('li', class_='dsc') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # # <ul class="info"> # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li> # <li class="subname">배변걱정 이제그만~</li> # <li class="price">28,900원</li> # <li class="consumer">49,900원</li> # <li class="icon"><span class="MK-product-icons"></span></li> # <li class="closeup"><a class="btn-overlay-show" href="javascript:viewdetail('002005000002', '1', '');"><img src="/images/common/view_shopdetail2.gif" alt="미리보기"></a></li> # <li class="cboth icon_option"></li> # </ul> # # #################################### div_list = product_ctx.find_all('ul') for div_ctx in div_list: sell_ctx = div_ctx.find('li', class_='price') consumer_ctx = div_ctx.find('li', class_='consumer') soldout_ctx = div_ctx.find('li', class_='soldout') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #__LOG__.Trace( page_url ) #__LOG__.Trace( self.PAGE_URL_HASH[page_url]) if (self.C_DETAIL_CATEGORY_VALUE.strip() != ''): split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category1 = split_data elif (idx == 2): product_data.crw_category2 = split_data elif (idx == 3): product_data.crw_category3 = split_data else: self.set_godo_category_data(soup, product_data) #################################### # 브랜드 추출 # # <span class="item_brand"> # <strong>[지그니쳐]</strong> # </span> # #################################### brand_div_list = product_ctx.find_all('span', class_='item_brand') for brand_ctx in brand_div_list: brand_name = brand_ctx.get_text().strip() if (brand_name != ''): product_data.crw_brand1 = brand_name.replace( '[', '').replace(']', '').strip() #################################### # 상품 이미지 확인 # # <div class="item_photo_box"> # <a href="../goods/goods_view.php?goodsNo=1000000896&mtn=%5E%7C%5E%5E%7C%5E"> # <img data-original="/data/goods/19/10/43/1000000896/1000000896_add2_085.jpg" width="250" alt="바잇밀 - 치킨/오리/말고기 샘플러 100g (3종)" title="바잇밀 - 치킨/오리/말고기 샘플러 100g (3종)" class="middle gd_image_lazy" src="/data/goods/19/10/43/1000000896/1000000896_add2_085.jpg" style="display: inline;"> # </a> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='item_photo_box') for img_div_ctx in img_div_list: img_ctx = img_div_ctx.find('img') #for img_ctx in img_list : if (img_ctx != None): img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) if (product_data.product_img == ''): product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 (2가지 방법) # # <div class="item_icon_box"> # <img src="/data/icon/goods_icon/당일출고아이콘.gif" alt="벌써배송상품" title="벌써배송상품" class="middle"> # <img src="/data/icon/goods_icon/icon_soldout.gif"> # </div> # #------------------------------------ # # <div class="item_photo_box"> # <a href="../goods/goods_view.php?goodsNo=1000001614"> # <img data-original="/data/goods/20/05/20/1000001614/1000001614_add2_099.jpg" width="250" alt="빅독 리틀 바이트 사료 - 악어고기 100g" title="빅독 리틀 바이트 사료 - 악어고기 100g" class="middle gd_image_lazy" src="/data/goods/20/05/20/1000001614/1000001614_add2_099.jpg" style="display: inline;"> # <strong class="item_soldout_bg" style="background-image:url(/data/icon/goods_icon/soldout-1.png);">SOLD OUT</strong> # </a> # </div> # #################################### soldout_div_list = product_ctx.find_all('div', class_='item_icon_box') for soldout_div_ctx in soldout_div_list: img_list = soldout_div_ctx.find_all('img') for img_ctx in img_list: if ('src' in img_ctx.attrs): if (0 < img_ctx.attrs['src'].find('soldout')): product_data.crw_is_soldout = 1 if (product_data.crw_is_soldout != 1): soldout_div_list = product_ctx.find_all( 'div', class_='item_photo_box') for soldout_div_ctx in soldout_div_list: img_list = soldout_div_ctx.find_all( 'strong', class_='item_soldout_bg') for img_ctx in img_list: product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <div class="item_tit_box"> # <a href="../goods/goods_view.php?goodsNo=1000001614"> # <strong class="item_name">빅독 리틀 바이트 사료 - 악어고기 100g</strong> # </a> # </div> # #################################### name_strong_list = product_ctx.find_all('div', class_='item_tit_box') for name_strong_ctx in name_strong_list: product_link_list = name_strong_ctx.find_all('a') for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): span_list = product_link_ctx.find_all('strong') for span_ctx in span_list: product_data.crw_name = span_ctx.get_text().strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?goodsNo=') product_data.crw_goods_code = split_list[1].strip() #################################### # 가격 # # <div class="item_money_box"> # <strong class="item_price"> # <span>23,000원 </span> # </strong> # </div> # #################################### div_list = product_ctx.find_all('div', class_='item_money_box') for div_ctx in div_list: del_ctx = div_ctx.find('del') strong_ctx = div_ctx.find('strong', class_='item_price') if (del_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit(del_ctx.get_text().strip())) if (strong_ctx != None): # 타임세일일때 뒷부분의 별도의 값이 붙어서, 값 이상 문제 해결법, crw_price_sale = strong_ctx.get_text().strip().split('\n') product_data.crw_price_sale = int( __UTIL__.get_only_digit(crw_price_sale[0].strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) location_ctx = soup.find('div', {'id': 'navigation-bar'}) if (location_ctx != None): span_list = location_ctx.find_all('span') idx = 0 for span_ctx in span_list: idx += 1 if (idx == 2): product_data.crw_category1 = span_ctx.get_text().strip( ) elif (idx == 3): product_data.crw_category2 = span_ctx.get_text().strip( ) elif (idx == 4): product_data.crw_category3 = span_ctx.get_text().strip( ) #################################### # 브랜드 추출 #################################### product_data.crw_brand1 = product_data.crw_category1 #################################### # 상품 이미지 확인 # # <img class="item-image" src="/_vir0001/product_img/P1449_20200421AM94623_2.jpg" alt="img1"> #################################### img_list = product_ctx.find_all('img', class_='item-image') for img_ctx in img_list: img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 상품 링크 정보 및 상품코드 # # <div class="item" style="cursor:pointer" onclick="goHref(1449,'P1449_20200421AM94623_2.jpg')"> #################################### if ('onclick' in product_ctx.attrs): onclick_str = product_ctx.attrs['onclick'].strip() split_data = onclick_str.split('goHref(') sub_split_data = split_data[1].split(',') product_data.crw_goods_code = sub_split_data[0].strip() crw_post_url = 'http://www.montraum.com/common/process/shopview.asp?thisCategory=22&pack_content_id=' + product_data.crw_goods_code #################################### # 상품명 / 품절여부 # # <p class="item-description" id="iconID1449" name="iconID1449">데일리관리 세트<br> (돈모 브러쉬+플러쉬 콤)</p> # # ---------- 품절시 ----------------------- # <p class="item-description" id="iconID1095" name="iconID1095">트레이닝패드 L 120매 (30매 x 4개)<img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle"> <img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle"> <img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle"> <img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle"> </p> #################################### name_div_list = product_ctx.find_all('p', class_='item-description') for name_div_ctx in name_div_list: product_data.crw_name = name_div_ctx.get_text().replace( '\n', ' ').strip() # 품절여부 soldout_img_list = name_div_ctx.find_all('img') for soldout_img in soldout_img_list: if ('src' in soldout_img.attrs): if (0 <= soldout_img.attrs['src'].find('soldout')): product_data.crw_is_soldout = 1 #################################### # 가격 # # <p class="item-price"> # <span class="list-price" id="ori_count1449" name="ori_count1449">74,000</span> <span class="now-price" id="promotion_ID1449" name="promotion_ID1449">40,900</span> # </p> #################################### div_list = product_ctx.find_all('p', class_='item-price') for div_ctx in div_list: span_list = div_ctx.find_all('span') for span_ctx in span_list: if ('class' in span_ctx.attrs): class_name_list = span_ctx.attrs['class'] if (class_name_list[0] == 'list-price'): product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip())) elif (class_name_list[0] == 'now-price'): product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### self.get_category_value(product_data, page_url, soup) #################################### # 상품 이미지 확인 # # <img data-prodcode="s20200603cfcd165650db6" alt="" src="https://cdn.imweb.me/thumbnail/20200603/69b4e17496c01.jpg" class="_org_img org_img _lazy_img" data-original="https://cdn.imweb.me/thumbnail/20200603/69b4e17496c01.jpg" data-src="https://cdn.imweb.me/thumbnail/20200603/69b4e17496c01.jpg" style="display: inline;"> # # class_='_org_img org_img _lazy_img' # class_='_org_img org_img owl-lazy' #################################### img_list = product_ctx.find_all('img') for img_ctx in img_list: if ('class' in img_ctx.attrs): class_name_list = img_ctx.attrs['class'] if (2 < len(class_name_list)): if (class_name_list[0] == '_org_img') and (class_name_list[1] == 'org_img'): img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip( ) if (img_src == ''): if ('data-src' in img_ctx.attrs): img_src = img_ctx.attrs['data-src'].strip() if (img_src == ''): if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # # <div class="ns-icon clearfix"> # <!--<span class="new bg-brand">NEW</span>--> # <div class="prod_icon sale">SALE</div><div class="prod_icon sold_out">SOLDOUT</div> </div> # #################################### soldout_div_list = product_ctx.find_all( 'div', class_='prod_icon sold_out') for soldout_div_ctx in soldout_div_list: product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <div class="item-detail" style=""> # <div class="item-pay"> # <h2 style="display: "> # <a class="_fade_link" href="/shop/?idx=1185">어반비스트 훈련용 코만도백</a> # </h2> # <div class="item-pay-detail"> # <p class="sale_pay body_font_color_50" style="">78,000원</p> <p class="pay" style=";"> # 58,500원 </p> # </div> # <div class="ns-icon clearfix"> # <!--<span class="new bg-brand">NEW</span>--> # <div class="prod_icon sale">SALE</div><div class="prod_icon sold_out">SOLDOUT</div> </div> # </div> # <div class="item-summary holder"> # <p>반려견의 산책과 훈련을 위한 코만도백</p> <a class="item-summary-link _fade_link" href="/shop/?idx=1185"><span class="sr-only">상품 요약설명</span></a> # </div> # <div class="item-icon"> # <span><i class="icon-bubble"></i> 0</span> # </div> # </div> # #################################### name_div_list = product_ctx.find_all('div', class_='item-detail') for name_div_ctx in name_div_list: h2_list = name_div_ctx.find_all('h2') for h2_ctx in h2_list: product_link_ctx = name_div_ctx.find('a', class_='_fade_link') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): product_data.crw_name = h2_ctx.get_text().strip() crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): split_list = crw_post_url.split('?idx=') crw_goods_code_list = split_list[1].strip( ).split('&') product_data.crw_goods_code = crw_goods_code_list[ 0].strip() #################################### # 가격 # # <div class="item-pay-detail"> # <p class="sale_pay body_font_color_50" style="">78,000원</p> <p class="pay" style=";"> # 58,500원 </p> # </div> # #################################### price_div_list = product_ctx.find_all('div', class_='item-pay-detail') for price_ctx in price_div_list: p_list = name_div_ctx.find_all('p') for p_ctx in p_list: if ('class' in p_ctx.attrs): class_name_list = p_ctx.attrs['class'] if (class_name_list[0] == 'sale_pay'): product_data.crw_price = int( __UTIL__.get_only_digit( p_ctx.get_text().strip())) elif (class_name_list[0] == 'pay'): product_data.crw_price_sale = int( __UTIL__.get_only_digit( p_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data_second(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### self.set_godo_category_data_second(soup, product_data) ''' div_list = soup.find_all("div" , class_='cg-main') for div_ctx in div_list : category_list = div_ctx.find_all('h2') for category_ctx in category_list : product_data.crw_category1 = category_ctx.get_text().strip() ''' ''' # 브랜드 확인 brand_div_list = product_ctx.find_all('span', class_='item_brand') for brand_ctx in brand_div_list : brand_name = brand_ctx.get_text().strip() if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip() ''' #################################### # 상품 이미지 확인 # # <div class="thumbnail"> # <a href="../goods/goods_view.php?goodsNo=1000000030"><img src="/data/goods/16/10/43/1000000030/1000000030_main_072.jpg" width="184" alt="Pet Esthé Spa Mud Conditioner (스파 머드 컨디셔너) (3L)" title="Pet Esthé Spa Mud Conditioner (스파 머드 컨디셔너) (3L)" class="middle"> # </a> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='thumbnail') for img_div_ctx in img_div_list: img_ctx = img_div_ctx.find('img') #for img_ctx in img_list : if (img_ctx != None): img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) if (product_data.product_img == ''): product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <div class="txt"> # <a href="../goods/goods_view.php?goodsNo=1000000030"> # <strong>Pet Esthé Spa Mud Conditioner (스파 머드 컨디셔너) (3L)</strong> </a> # </div> # #################################### name_div_list = product_ctx.find_all('div', class_='txt') for name_div_ctx in name_div_list: product_link_list = name_div_ctx.find_all('a') for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): span_list = product_link_ctx.find_all('strong') for span_ctx in span_list: product_data.crw_name = span_ctx.get_text().strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?goodsNo=') sub_split_list = split_list[1].split('&') product_data.crw_goods_code = sub_split_list[0].strip() #################################### # 가격 / 품절여부 # # <div class="price gd-default"> # <span class="cost"> # <strong>180,000</strong>원 # </span> # <br> # </div> # # -------------품절시 ----------------- # <div class="price gd-default"> # <span class="cost"> # <strong>일시품절</strong> # </span> # <br> # </div> # #################################### div_list = product_ctx.find_all('div') for div_ctx in div_list: if ('class' in div_ctx.attrs): class_name_list = div_ctx.attrs['class'] if (class_name_list[0] == 'price'): cost_ctx = div_ctx.find('span', class_='cost') if (cost_ctx != None): cost_value = cost_ctx.get_text().strip() product_data.crw_price = int( __UTIL__.get_only_digit(cost_value)) if (0 < cost_value.find('품절')): product_data.crw_is_soldout = 1 if (crw_post_url != ''): if (self.PRODUCT_URL_HASH.get(crw_post_url, -1) == -1): self.set_product_data_sub(product_data, crw_post_url) #self.print_product_page_info( product_data ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 #################################### div_list = soup.find_all('div', class_='category_depth clearbox') for div_ctx in div_list: li_list = div_ctx.find_all('li', class_='item') idx = 0 for li_ctx in li_list: category_link = li_ctx.find('a') if (category_link != None): idx += 1 category_name = category_link.get_text().strip() if (idx == 2): product_data.crw_category1 = category_link.get_text( ).strip() elif (idx == 3): product_data.crw_category2 = category_link.get_text( ).strip() elif (idx == 4): product_data.crw_category3 = category_link.get_text( ).strip() ''' #################################### # 브랜드 추출 #################################### ''' #################################### # 상품 이미지 확인 / 상품 링크 정보 / 상품번호 # # <a href="/goods/view?no=792" target=""> # <img src="/data/goods/201606/792_18171332list2.jpg" width="130" onerror="this.src='/data/skin/0545blueface/images/common/noimage.gif';this.style.height='130px';"> # </a> #################################### span_list = product_ctx.find_all('span', class_='goodsDisplayImageWrap') for span_ctx in span_list: product_link_ctx = span_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?no=') product_data.crw_goods_code = split_list[1].strip() img_list = product_link_ctx.find_all('img') for img_ctx in img_list: if ('onerror' in img_ctx.attrs): img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip( ) elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 상품명 # # <a href="/goods/view?no=792" target=""><span style="color:#4C4C4C;font-family:dotum;font-size:10pt;font-weight:normal;text-decoration:none;">버박 칼시데리스 칼슘영양제 (30정)</span></a> #################################### name_div_list = product_ctx.find_all('a') for name_div_ctx in name_div_list: span_ctx = name_div_ctx.find('span') if (span_ctx != None): product_data.crw_name = span_ctx.get_text().strip() #################################### # 가격 / 품절여부 # # <span style="color:#4C4C4C;font-family:dotum;font-size:10pt;font-weight:normal;text-decoration:line-through;"> # 21,000 # 원 </span> # # # <span style="color:#4C4C4C;font-family:dotum;font-size:10pt;font-weight:bold;text-decoration:none;"> # 15,000 # 원 </span> # # -------- 품절시 -------------- # <td align="center"> # <img src="/data/icon/goods_status/icon_list_soldout.gif"> # </td> #################################### div_list = product_ctx.find_all('td') for div_ctx in div_list: soldout_img_list = div_ctx.find_all('img') for soldout_ctx in soldout_img_list: if ('src' in soldout_ctx.attrs): if (0 <= soldout_ctx.attrs['src'].find('soldout')): product_data.crw_is_soldout = 1 span_ctx = div_ctx.find('span') if (span_ctx != None): span_str = span_ctx.get_text().strip() if ('style' in span_ctx.attrs): if (span_str != ''): if (span_str[0].isdigit()) and ( 0 < span_str.find('원')): if (0 < span_ctx.attrs['style'].find( 'text-decoration:line-through')): product_data.crw_price = int( __UTIL__.get_only_digit(span_str)) if (0 < span_ctx.attrs['style'].find( 'text-decoration:none')): product_data.crw_price_sale = int( __UTIL__.get_only_digit(span_str)) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, category_path_str, sub_category_str, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) idx = 0 for category_name in category_path_str: idx += 1 if (idx == 2): product_data.crw_category1 = category_name elif (idx == 3): product_data.crw_category2 = category_name elif (idx == 4): product_data.crw_category3 = category_name if (sub_category_str != ''): if (idx == 2): product_data.crw_category2 = sub_category_str elif (idx == 3): product_data.crw_category3 = sub_category_str #product_data.crw_category1 = self.PAGE_URL_HASH[ page_url ] ''' #################################### # 브랜드 추출 #################################### brand_div_list = product_ctx.find_all('div', class_='brand') for brand_ctx in brand_div_list : product_data.crw_brand1 = brand_ctx.get_text().strip() ''' #################################### # 상품 이미지 확인 / 품절여부 추출 # # <a href="/view/product/G0OS8PKL0KAHNCUJ/YSJRQFSCH" class="itemImg" title="[맛보기초특가!] 체험팩 8종 8팩"> # <img src="http://www.dhuman.co.kr/static-root/prdct/2020/05/13/bd1f6f76032f48329e927e9f7e727fe7.jpg" alt="[맛보기초특가!] 체험팩 8종 8팩" class="" loading="lazy"> # <span class="discountThumb"> # <span class="discount"><strong>50</strong>%</span> # </span> # <span class="packplayWrap"> # <span class="pack_bg02"><span><strong class="pack_font">8</strong>팩</span></span> # </span> # </a> # #################################### img_div_list = product_ctx.find_all('a', class_='itemImg') for img_div_ctx in img_div_list: img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) span_list = img_div_ctx.find_all('span') for span_ctx in span_list: soldout_str = span_ctx.get_text().strip() if (0 <= soldout_str.find('품절')): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <div class="itemTit"> # <p class="name"> # <a href="/view/product/G0OS8PKL0KAHNCUJ/YSJRQFSCH" title="[맛보기초특가!] 체험팩 8종 8팩"> # [맛보기초특가!] 체험팩 8종 8팩 # </a> # </p> # <p class="cmnt">#휴먼그레이드 #신상체험팩</p> # </div> # #################################### name_div_list = product_ctx.find_all('div', class_='itemTit') for name_div_ctx in name_div_list: name_ctx = name_div_ctx.find('p', class_='name') if (name_ctx != None): name_link_ctx = name_ctx.find('a') if (name_link_ctx != None): if ('href' in name_link_ctx.attrs): product_data.crw_name = name_link_ctx.get_text( ).strip() split_list = name_link_ctx.attrs['href'].strip( ).split(';') tmp_product_link = split_list[0].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, split_list[0].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('/') product_data.crw_goods_code = split_list[6].strip() #################################### # 가격 # # <div class="priceWrap"> # <span class="primecost"><strong>19,900</strong></span> # <span class="price"><strong>9,900</strong>원</span> # </div> #################################### div_list = product_ctx.find_all('div', class_='priceWrap') for div_ctx in div_list: span_list = div_ctx.find_all('span') for span_ctx in span_list: if ('class' in span_ctx.attrs): class_name_list = span_ctx.attrs['class'] if (class_name_list[0] == 'primecost'): product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip())) elif (class_name_list[0] == 'price'): product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, category_key, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### self.get_category_value(product_data, category_key, soup) #################################### # 상품 이미지 확인 # # <div class="thumb img" imgsrc="/uploadedFiles/46606/product/image_1573609552547.jpeg" style="width:100%;background-image:url(https://contents.sixshop.com/thumbnails/uploadedFiles/46606/product/image_1573609552547_1000.jpeg)"></div> # #################################### img_div_list = product_ctx.find_all('div', class_='thumb img') for img_div_ctx in img_div_list: if ('style' in img_div_ctx.attrs): tmp_img_src = img_div_ctx.attrs['style'].strip() split_list = tmp_img_src.split(':url(') img_src = split_list[1].replace(')', '') if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # # <div class="soldOutBadge badge"><span>Sold Out</span></div> # #################################### soldout_div_list = product_ctx.find_all( 'div', class_='soldOutBadge badge') for soldout_div_ctx in soldout_div_list: product_data.crw_is_soldout = 1 # 가격 부분에 sold out 문구가 있는 경우 price_div_list = product_ctx.find_all('div', class_='shopProduct price') for price_ctx in price_div_list: soldout_str = price_ctx.get_text().strip() if (0 <= soldout_str.lower().find('sold')) and ( 0 < soldout_str.lower().find('out')): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <div class="shopProductWrapper badgeUse" data-productno="1008345"><a href="/product/Chu"><div class="thumbDiv"><div class="thumb img" imgsrc="/uploadedFiles/46606/product/image_1573609552547.jpeg" style="width:100%;background-image:url(https://contents.sixshop.com/thumbnails/uploadedFiles/46606/product/image_1573609552547_1000.jpeg)"></div><div class="shopProductBackground"></div><div class="badgeWrapper"><div class="soldOutBadge badge"><span>Sold Out</span></div></div></div><div class="shopProductNameAndPriceDiv"><div class="shopProductNameAndPriceContent"><div class="shopProductNameAndPrice"><div class="shopProduct productName">멜로니코코 풉백</div><div class="shopProduct price"><span class="productPriceSpan">20,000원</span></div></div></div></div></a></div> # #################################### if ('data-productno' in product_ctx.attrs): product_data.crw_goods_code = product_ctx.attrs[ 'data-productno'] product_link_ctx = product_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): tmp_product_link = product_link_ctx.attrs['href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') name_div_list = product_ctx.find_all( 'div', class_='shopProduct productName') for name_div_ctx in name_div_list: product_data.crw_name = name_div_ctx.get_text().strip() #################################### # 가격 # # <span class="productPriceSpan">20,000원</span> # # <div class="shopProduct price"><span class="productDiscountPriceSpan">16,200원 </span><span class="productPriceWithDiscountSpan">18,000원</span></div> #################################### price_div_list = product_ctx.find_all('div', class_='shopProduct price') for price_ctx in price_div_list: span_list = price_ctx.find_all('span') for span_ctx in span_list: if ('class' in span_ctx.attrs): class_name_list = span_ctx.attrs['class'] if (class_name_list[0] == 'productPriceSpan'): product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip())) elif (class_name_list[0] == 'productDiscountPriceSpan' ): product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_ctx.get_text().strip())) elif (class_name_list[0] == 'productPriceWithDiscountSpan'): product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data_second(self, page_url, product_json): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_second(page_url, product_data, soup) #product_data.crw_category1 = self.PAGE_URL_HASH[ page_url ] product_data.crw_category1 = self.CRW_CATEGORY_1 product_data.crw_category2 = self.CRW_CATEGORY_2 product_data.crw_category3 = self.CRW_CATEGORY_3 for key in product_json: #__LOG__.Trace('%s : %s' % (key, product_json[key] )) # 이미지 if (key == 'image_medium'): img_src = product_json[key] img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) if (key == 'image_big'): if (product_data.product_img != ''): img_src = product_json[key] img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) if (key == 'image_small'): if (product_data.product_img != ''): img_src = product_json[key] img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) # 상품명 if (key == 'product_name_striptag'): product_data.crw_name = product_json[key] if (key == 'product_name_tag'): if (product_data.crw_name != ''): product_data.crw_name = product_json[key] if (key == 'product_name'): if (product_data.crw_name != ''): product_data.crw_name = product_json[key] # 상품명번호 if (key == 'product_no'): product_data.crw_goods_code = str(product_json[key]) # 상품 URL if (key == 'link_product_detail'): tmp_product_link = product_json[key] if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % (self.BASIC_PRODUCT_URL, product_json[key]) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') # 상품 가격 if (key == 'product_custom'): product_data.crw_price = int(product_json[key]) if (key == 'product_price'): product_data.crw_price_sale = int(product_json[key]) # soldout if (key == 'soldout_icon'): if (product_json[key].strip() != ''): product_data.crw_is_soldout = 1 if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <div class="thumb"> # <div class="over_view -mos01"> # <ul> # <li><a href="javascript:viewdetail('011000000054', '1', '');" aria-label="확대보기" class="hint--top"><i class="xi-search" alt="확대보기"></i></a></li> # <li><a href="javascript:viewdetail('011000000054', '1', '');" aria-label="관심상품" class="hint--top"><i class="xi-heart-o" alt="관심상품"></i></a></li> # <li><a href="/shop/shopdetail.html?branduid=2243605&xcode=009&mcode=000&scode=&type=P&sort=regdate&cur_code=009&GfDT=bm1%2BW1w%3D" aria-label="상세보기" class="hint--top"><i class="xi-bars" alt="상세보기"></i></a></li> # </ul> # </div> # <a href="/shop/shopdetail.html?branduid=2243605&xcode=009&mcode=000&scode=&type=P&sort=regdate&cur_code=009&GfDT=bm1%2BW1w%3D"><img class="MS_prod_img_s" src="/shopimages/coates1024/0110000000543.gif?1581155993"></a> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='thumb') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # <li class="name">스카이 브라운 클래식 카시트</li> #################################### name_strong_list = product_ctx.find_all('li', class_='name') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # # # <li> # <div class="over_sale -mos">30%</div> # <span class="price01">39,500원</span> # <span class="price02">27,600원</span> # </li> # # #################################### div_list = product_ctx.find_all('ul') for div_ctx in div_list: sell_ctx = div_ctx.find('span', class_='price02') consumer_ctx = div_ctx.find('span', class_='price01') soldout_ctx = div_ctx.find('li', class_='soldout') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <div class="thumb salebox"> # <a href="/shop/shopdetail.html?branduid=3534594&xcode=003&mcode=001&scode=&type=X&sort=manual&cur_code=003&GfDT=aWt3UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/cocochien/0030010000152.jpg?1581790516" alt="상품 섬네일"></a> # <input type="hidden" name="custom_price" value="0"> # <input type="hidden" name="product_price" value="34500"> # <div id="sale_bg" style="display: none;"><span class="sale_text"></span></div> # <div class="info_icon"> # <span class="m_quickview"><a class="btn-overlay-show" href="javascript:viewdetail('003001000015', '1', '');"><img src="/design/cocochien/0746amelie/info_icon02.gif"></a></span> <span class="m_option"><img src="/shopimages/cocochien/bt_opt_preview.gif" onclick="javascript:mk_prd_option_preview('3534594',event);"></span> </div><!-- //info_icon --> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='thumb salebox') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # <li class="dsc">앨리스튜튜</li> #################################### name_strong_list = product_ctx.find_all('li', class_='dsc') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # #<ul class="info"> # <li class="dsc">네이비도트원피스(50%SALE)SM,XL주문가능</li> # <li class="subname"></li> # <li class="consumer">26,000원</li> <li class="price">13,000원</li> # <li class="icon"><span class="MK-product-icons"></span></li> # </ul> # #---------- 품절시 -------------------- # <ul class="info"> # <li class="dsc">마카롱나시원피스(50%SALE)</li> # <li class="subname"></li> # <li class="soldout">SOLD OUT</li> # <li class="icon"><span class="MK-product-icons"></span></li> # </ul> #################################### div_list = product_ctx.find_all('ul') for div_ctx in div_list: sell_ctx = div_ctx.find('li', class_='price') consumer_ctx = div_ctx.find('li', class_='consumer') soldout_ctx = div_ctx.find('li', class_='soldout') if (soldout_ctx != None): product_data.crw_is_soldout = 1 if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <div class="thumb"> # <a href="/shop/shopdetail.html?branduid=1000006164&xcode=007&mcode=006&scode=001&type=X&sort=order&cur_code=007&GfDT=aWx3UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/sizeoo/0070060000702.jpg?1589180862" onmouseover="this.src='/shopimages/sizeoo/007006000070.jpg?1589180862'" onmouseout="this.src='/shopimages/sizeoo/0070060000702.jpg?1589180862'" alt="" title=""></a> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='thumb') for img_div_ctx in img_div_list: product_link_list = img_div_ctx.find_all('a') img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) break for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if (crw_post_url != ''): self.get_crw_goods_code(product_data, crw_post_url) self.get_category_value(product_data, crw_post_url) break #################################### # 상품명 및 브랜드 # <li class="name"><span class="MK-product-icons"></span> 데이지 비치 원피스 (옐로우)</li> #################################### name_strong_list = product_ctx.find_all('li', class_='name') for name_strong_ctx in name_strong_list: product_data.crw_name = name_strong_ctx.get_text().strip() # # 이름 앞에 브랜드명이 있음. # [스텔라&츄이] 츄이스 치킨 디너패티 if (0 == product_data.crw_name.find('[')): brand_list = product_data.crw_name.split(']') product_data.crw_brand1 = brand_list[0][1:].strip() #################################### # 가격 / 품절 여부 확인 # # # <li class="price"> # <span><s>32,000</s>원</span> # 32,000원 # </li> # #------------품절시 ---------------- # <li class="price"> # Sold Out # </li> #################################### div_list = product_ctx.find_all('li', class_='price') for div_ctx in div_list: sell_price = div_ctx.get_text().strip() consumer_ctx = div_ctx.find('span') consumer_price = '' if (consumer_ctx != None): consumer_price = consumer_ctx.get_text().strip() product_data.crw_price = int( __UTIL__.get_only_digit(consumer_price)) crw_price_sale = sell_price.replace(consumer_price, '').strip() product_data.crw_price_sale = int( __UTIL__.get_only_digit(crw_price_sale)) # 품절시 가격없이 Sold Out 문구 나옴. if (0 < crw_price_sale.strip().find('Out')): product_data.crw_is_soldout = 1 if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list : idx += 1 if(idx == 1 ) : product_data.crw_category1 = split_data.strip() elif(idx == 2 ) : product_data.crw_category2 = split_data.strip() elif(idx == 3 ) : product_data.crw_category3 = split_data.strip() # 상품 이미지 확인 div_list = product_ctx.find_all('div', class_='thumbnail') for div_ctx in div_list : a_link_list = product_ctx.find_all('a') for a_link_ctx in a_link_list : img_list = a_link_ctx.find_all('img') for img_ctx in img_list : if('src' in img_ctx.attrs ) : img_src = img_ctx.attrs['src'].strip() if( img_src != '' ) : img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src ) if(product_data.product_img == '') : product_data.product_img = self.get_hangul_url_convert( img_link ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) name_div_list = product_ctx.find_all('strong', class_='name') for name_div_ctx in name_div_list : # # 상품명 / 상품코드 # product_link_list = name_div_ctx.find_all('a') for product_link_ctx in product_link_list : if('href' in product_link_ctx.attrs ) : product_data.crw_name = product_link_ctx.get_text().strip() tmp_product_link = product_link_ctx.attrs['href'].strip() if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() ) crw_post_url = tmp_product_link if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'') split_list = crw_post_url.split('/') if( product_data.crw_name == '') : product_data.crw_name = split_list[4].strip() product_data.crw_goods_code = split_list[5].strip() # # 가격 / 브랜드 # div_list = product_ctx.find_all('div', class_='description') for div_ctx in div_list : span_list = div_ctx.find_all('span') for span_ctx in span_list : if('class' in span_ctx.attrs ) : class_name_list = span_ctx.attrs['class'] if(len(class_name_list) == 1) and ( class_name_list[0].strip() == 'summary') : product_data.crw_brand1 = span_ctx.get_text().strip() li_list = div_ctx.find_all('li') for li_ctx in li_list : if('class' in li_ctx.attrs ) : class_name_list = li_ctx.attrs['class'] if(len(class_name_list) == 1) and ( class_name_list[0].strip() == 'price') : product_data.crw_price = int( __UTIL__.get_only_digit( li_ctx.get_text().strip() ) ) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) category_ctx_list = soup.select('#menu_inner') for category_ctx in category_ctx_list: split_list = category_ctx.get_text().strip().split('>') idx = 0 for a_ctx in split_list: idx += 1 category_name = a_ctx.strip() if (idx == 2): product_data.crw_category1 = category_name elif (idx == 3): product_data.crw_category2 = category_name elif (idx == 4): product_data.crw_category3 = category_name #product_data.crw_category1 = self.PAGE_URL_HASH[page_url] ############################ # 품절여부 ############################ soldout_ctx = product_ctx.find('span', class_='soldOut') if (soldout_ctx != None): product_data.crw_is_soldout = 1 #################################### # 상품 이미지 확인 # # <div class="thumbnail"> # <div class="centered"> # <a href="prd_detail.php?idx=171&part_idx=90"><img src="/data/goodsImages/1529056838_IMAGES1.jpg" data-pin-nopin="true"></a> # </div> # </div> #################################### span_list = product_ctx.find_all('div', class_='thumbnail') for span_ctx in span_list: product_link_ctx = span_ctx.find('a') if (product_link_ctx != None): img_list = product_link_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 상품명 / 상품 링크 정보 / 상품번호 # # <div class="title"><a href="prd_detail.php?idx=171&part_idx=90"><!--[보듬]--> 보듬 10mm 폴딩 리드줄 (길이 조절 가능)</a></div> #################################### name_div_list = product_ctx.find_all('div', class_='title') for name_div_ctx in name_div_list: span_ctx = name_div_ctx.find('a') if (span_ctx != None): if ('href' in span_ctx.attrs): tmp_product_link = span_ctx.attrs['href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, span_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?idx=') sub_split_list = split_list[1].strip().split('&') product_data.crw_goods_code = sub_split_list[0] split_list = span_ctx.get_text().strip().split(']') crw_name = split_list[0].strip() if (len(split_list) == 2): product_data.crw_brand1 = split_list[0].replace( '[', '').strip() crw_name = split_list[1].strip() product_data.crw_name = crw_name #################################### # 가격 # # <div class="priceWrap"> # <div class="saleprice"><span>119,000</span>원</div> # <div class="price"><strong>101,150</strong>원</div> # </div> #################################### div_list = product_ctx.find_all('div', class_='priceWrap') for div_ctx in div_list: saleprice_ctx = div_ctx.find('div', class_='saleprice') price_ctx = div_ctx.find('div', class_='price') if (saleprice_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( saleprice_ctx.get_text().strip())) if (price_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### if (self.C_DETAIL_CATEGORY_VALUE.strip() != ''): split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category1 = split_data elif (idx == 2): product_data.crw_category2 = split_data elif (idx == 3): product_data.crw_category3 = split_data #div_list = soup.find_all( 'div' , class_='sub_title_txt' ) #for div_ctx in div_list : # category_list = div_ctx.find_all( 'h2' ) # for category_ctx in category_list : # product_data.crw_category1 = category_ctx.get_text().strip() ''' #################################### # 브랜드 추출 #################################### brand_div_list = product_ctx.find_all('span', class_='item_brand') for brand_ctx in brand_div_list : brand_name = brand_ctx.get_text().strip() if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip() ''' #################################### # 상품 이미지 확인 # # <div class="img"> # <img src="https://img.mywisa.com/freeimg/smallstuff/_data/product/201803/31/9ee1628095bceaf0f9bb5d8dae079791.jpg" width="260" height="260"> # <!-- 상품품절 영역 --> # 생략 # </div> #################################### img_div_list = product_ctx.find_all('div', class_='prdimg') for img_div_ctx in img_div_list: img_ctx = img_div_ctx.find('img') #for img_ctx in img_list : if (img_ctx != None): img_src = '' if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # 품절시 <div class="info out"> 으로 표현됨 # # <!-- 상품품절 영역 --> # <div class="soldout" onclick="location.href='https://www.smallstuff.kr/shop/detail.php?pno=A02FFD91ECE5E7EFEB46DB8F10A74059&rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&ctype=1&cno1=1001'">Sold out</div> # <!-- //상품품절 영역 --> #################################### if ('class' in product_ctx.attrs): class_name_list = product_ctx.attrs['class'] # if( len(class_name_list) == 2 ) : if (class_name_list[0] == 'soldout'): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <p class="name"><a href="https://www.smallstuff.kr/shop/detail.php?pno=BEED13602B9B0E6ECB5B568FF5058F07&rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&ctype=1&cno1=1001">DINING SET 1P OLIVE</a></p> # #################################### name_strong_list = product_ctx.find_all('p', class_='name') for name_strong_ctx in name_strong_list: product_link_ctx = name_strong_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): product_data.crw_name = product_link_ctx.get_text( ).strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?pno=') second_split_list = split_list[1].split('&') product_data.crw_goods_code = second_split_list[ 0].strip() #################################### # 가격 # # <div class="price"> # <p class="consumer consumer">KRW 24,000</p> # <p class="sell sell"><strong>KRW 22,800 </strong></p> # </div> # #################################### div_list = product_ctx.find_all('div', class_='price') for div_ctx in div_list: sell_ctx = div_ctx.find('p', class_='sell') # print('sell_ctx',sell_ctx) if (sell_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(sell_ctx.get_text().strip())) crw_price = div_ctx.find('p', class_='consumer') if (crw_price != None): product_data.crw_price = int( __UTIL__.get_only_digit(crw_price.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) category_ctx_list = soup.select('#sct_location') for category_ctx in category_ctx_list: a_ctx_list = category_ctx.find_all('a') idx = 0 for a_ctx in a_ctx_list: idx += 1 category_name = a_ctx.get_text().strip() if (idx == 2): product_data.crw_category1 = category_name elif (idx == 3): product_data.crw_category2 = category_name elif (idx == 4): product_data.crw_category3 = category_name #product_data.crw_category1 = self.PAGE_URL_HASH[page_url] #################################### # 상품 이미지 확인 # # <div class="sct_img"> # <a href="http://shop.i-avec.com/shop/item.php?it_id=1585815848"> # <img src="http://shop.i-avec.com/data/item/1585815848/thumb-7KCE7ZW07IiY6riw_front_310x310.png" width="310" height="310" alt="아베크 전해수기 뿌조" title=""> # </a> # <div class="sct_sns"><a href="https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fshop.i-avec.com%2Fshop%2Fitem.php%3Fit_id%3D1585815848&p=%EC%95%84%EB%B2%A0%ED%81%AC+%EC%A0%84%ED%95%B4%EC%88%98%EA%B8%B0+%EB%BF%8C%EC%A1%B0+%7C+%EC%95%84%EB%B2%A0%ED%81%AC+%ED%8E%AB%EB%93%9C%EB%9D%BC%EC%9D%B4%EB%A3%B8-%ED%8E%AB+%EC%82%B4%EA%B7%A0+%ED%86%A0%ED%83%88+%EC%BC%80%EC%96%B4%EB%A3%B8" class="share-facebook" target="_blank"><img src="http://shop.i-avec.com/theme/basic/skin/shop/basic/img/facebook.png" alt="페이스북에 공유"></a><a href="https://twitter.com/share?url=http%3A%2F%2Fshop.i-avec.com%2Fshop%2Fitem.php%3Fit_id%3D1585815848&text=%EC%95%84%EB%B2%A0%ED%81%AC+%EC%A0%84%ED%95%B4%EC%88%98%EA%B8%B0+%EB%BF%8C%EC%A1%B0+%7C+%EC%95%84%EB%B2%A0%ED%81%AC+%ED%8E%AB%EB%93%9C%EB%9D%BC%EC%9D%B4%EB%A3%B8-%ED%8E%AB+%EC%82%B4%EA%B7%A0+%ED%86%A0%ED%83%88+%EC%BC%80%EC%96%B4%EB%A3%B8" class="share-twitter" target="_blank"><img src="http://shop.i-avec.com/theme/basic/skin/shop/basic/img/twitter.png" alt="트위터에 공유"></a><a href="https://plus.google.com/share?url=http%3A%2F%2Fshop.i-avec.com%2Fshop%2Fitem.php%3Fit_id%3D1585815848" class="share-googleplus" target="_blank"><img src="http://shop.i-avec.com/theme/basic/skin/shop/basic/img/gplus.png" alt="구글플러스에 공유"></a></div> # </div> # #################################### span_list = product_ctx.find_all('div', class_='sct_img') for span_ctx in span_list: product_link_ctx = span_ctx.find('a') if (product_link_ctx != None): img_list = product_link_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 # # <div class="sct_icon"><span class="sit_icon"><br><span class="shop_icon_soldout">Sold Out</span></span></div> #################################### name_div_list = product_ctx.find_all('div', class_='sct_icon') for name_div_ctx in name_div_list: soldout_ctx = name_div_ctx.find('span', class_='shop_icon_soldout') if (soldout_ctx != None): product_data.crw_is_soldout = 1 #################################### # 상품명 / 상품 링크 정보 / 상품번호 # # <div class="sct_txt"><a href="http://shop.i-avec.com/shop/item.php?it_id=1585815848"> # 아베크 전해수기 뿌조 # </a></div> #################################### name_div_ctx = product_ctx.find('div', class_='sct_txt') if (name_div_ctx != None): product_link_ctx = name_div_ctx.find('a') if (product_link_ctx != None): if ('href' in product_link_ctx.attrs): product_data.crw_name = product_link_ctx.get_text( ).strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?it_id=') sub_split_list = split_list[1].strip().split('&') product_data.crw_goods_code = sub_split_list[0] #################################### # 가격 # # <div class="sct_cost"> # <span class="sct_discount">70,000원</span> # 62,900원 #</div> #################################### price_ctx = product_ctx.find('div', class_='sct_cost') if (price_ctx != None): price_discount_ctx = product_ctx.find('span', class_='sct_discount') if (price_discount_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( price_discount_ctx.get_text().strip())) len_price_str = len(price_discount_ctx.get_text().strip()) price_str = price_ctx.get_text().strip() crw_price_sale = price_str[len_price_str:].strip() product_data.crw_price_sale = int( __UTIL__.get_only_digit(crw_price_sale)) else: product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) self.get_category_value( product_data, page_url, soup ) #################################### # 브랜드 추출 # # <div class="brand">SALLYS LAW</div> #################################### brand_div_list = product_ctx.find_all('div', class_='brand') for brand_ctx in brand_div_list : product_data.crw_brand1 = brand_ctx.get_text().strip() #################################### # 상품 이미지 확인 # # <div class="img"> # <img src="//image.wconcept.co.kr/productimg/image/img1/96/300972496.jpg?RS=300" alt=""> # </div> #################################### img_div_list = product_ctx.find_all('div', class_='img') for img_div_ctx in img_div_list : img_list = img_div_ctx.find_all('img') for img_ctx in img_list : img_src = '' if('data-original' in img_ctx.attrs ) : img_src = img_ctx.attrs['data-original'].strip() elif('src' in img_ctx.attrs ) : img_src = img_ctx.attrs['src'].strip() split_list = img_src.split('?') img_src = split_list[0].strip() if( img_src != '' ) : img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src ) product_data.product_img = self.get_hangul_url_convert( img_link ) ''' #################################### # 품절여부 추출 #################################### soldout_div_list = product_ctx.find_all('div', class_='item_icon_box') for soldout_div_ctx in soldout_div_list : img_list = soldout_div_ctx.find_all('img') for img_ctx in img_list : if('src' in img_ctx.attrs ) : if(0 < img_ctx.attrs['src'].find('soldout') ) : product_data.crw_is_soldout = 1 ''' #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <a href="/Product/300972496"> # #################################### product_link_ctx = product_ctx.find('a') if( product_link_ctx != None ) : if('href' in product_link_ctx.attrs ) : tmp_product_link = product_link_ctx.attrs['href'].strip() if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() ) crw_post_url = tmp_product_link if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'') split_list = crw_post_url.split('/') product_data.crw_goods_code = split_list[4].strip() name_strong_list = product_ctx.find_all('div', class_='product ellipsis multiline') for name_strong_ctx in name_strong_list : product_data.crw_name = name_strong_ctx.get_text().strip() #################################### # 가격 # # <div class="price"> # <span class="discount_price">74,400</span> # <span class="base_price">93,000</span> # <span class="discount_rate">20%</span> # </div> # #################################### div_list = product_ctx.find_all('div', class_='price') for div_ctx in div_list : span_list = div_ctx.find_all('span') for span_ctx in span_list : if('class' in span_ctx.attrs ) : class_name_list = span_ctx.attrs['class'] if(class_name_list[0] == 'base_price' ) : product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() ) ) elif(class_name_list[0] == 'discount_price' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() )) if( crw_post_url != '' ) : #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub( product_data, crw_post_url ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' #################################### # 상품 카테고리 추출 #################################### product_data.crw_category1 = self.PAGE_URL_HASH[page_url] ''' # 브랜드 확인 brand_div_list = product_ctx.find_all('span', class_='item_brand') for brand_ctx in brand_div_list : brand_name = brand_ctx.get_text().strip() if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip() ''' #################################### # 상품 이미지 확인 #################################### img_div_list = product_ctx.find_all('div', class_='goodsimg') for img_div_ctx in img_div_list: img_list = img_div_ctx.find_all('img') for img_ctx in img_list: img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src.startswith('..')): tmp_img_src = '/shop%s' % img_src[2:] img_src = tmp_img_src if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) ''' # 품절여부 확인 soldout_div_list = product_ctx.find_all('div', class_='item_icon_box') for soldout_div_ctx in soldout_div_list : if(config.__DEBUG__) : __LOG__.Trace('품절여부 확인') img_list = soldout_div_ctx.find_all('img') for img_ctx in img_list : if('src' in img_ctx.attrs ) : if(0 < img_ctx.attrs['src'].find('soldout') ) :product_data.crw_is_soldout = 1 # 품절여부 확인 soldout_div_list = product_ctx.find_all('div', class_='item_photo_box') for soldout_div_ctx in soldout_div_list : if(config.__DEBUG__) : __LOG__.Trace('품절여부 확인') img_list = soldout_div_ctx.find_all('strong', class_='item_soldout_bg') for img_ctx in img_list : product_data.crw_is_soldout = 1 ''' #################################### # 상품 링크 정보 및 상품명 / 상품코드 #################################### name_div_list = product_ctx.find_all('div', class_='goods_m_name') for name_div_ctx in name_div_list: product_link_list = name_div_ctx.find_all('a') for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): product_data.crw_name = product_link_ctx.get_text( ).strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?goodsno=') sub_split_list = split_list[1].strip().split('&') product_data.crw_goods_code = sub_split_list[0].strip() #################################### # 가격 #################################### div_list = product_ctx.find_all('div') for div_ctx in div_list: cost_ctx = div_ctx.find('b') strike_ctx = div_ctx.find('strike') if (cost_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(cost_ctx.get_text().strip())) if (strike_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit(strike_ctx.get_text().strip())) if (crw_post_url != ''): if (self.PRODUCT_URL_HASH.get(crw_post_url, -1) == -1): self.set_product_data_sub(product_data, crw_post_url) #self.print_product_page_info( product_data ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' #################################### # 상품 이미지 확인 # 상품 링크 정보 및 상품코드 # 카테고리 # # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=10163894&xcode=001&mcode=005&scode=003&type=X&sort=manual&cur_code=001&GfDT=bml9W1w%3D"><img class="MS_prod_img_m" src="/shopimages/dermadog/0010050000192.jpg?1591754112" alt="상품 섬네일" title="상품 섬네일"></a></dt> #################################### img_div_list = product_ctx.find_all('dt', class_='thumb') for img_div_ctx in img_div_list : img_list = img_div_ctx.find_all('img') for img_ctx in img_list : img_src = '' if('src' in img_ctx.attrs ) : split_list = img_ctx.attrs['src'].strip().split('?') img_src = split_list[0].strip() if( img_src != '' ) : img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src ) product_data.product_img = self.get_hangul_url_convert( img_link ) break product_link_ctx = img_div_ctx.find('a') if( product_link_ctx != None) : if('href' in product_link_ctx.attrs ) : crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') if(crw_post_url != '') : self.get_crw_goods_code( product_data, crw_post_url ) self.get_category_value( product_data, crw_post_url ) #################################### # 상품명 및 브랜드 # # <dd class="prd-info"> # <ul> # <li class="prd-brand"><span class="MK-product-icons"><img src="/shopimages/dermadog/prod_icons/4154?1591753540" class="MK-product-icon-2"></span></li> # <li class="prd-name"><a href="/shop/shopdetail.html?branduid=10163894&xcode=001&mcode=005&scode=003&type=X&sort=manual&cur_code=001&GfDT=bml9W1w%3D">연어/스킨 헬스츄 15g</a></li> # </ul> # </dd> # #################################### name_dd_list = product_ctx.find_all('dd', class_='prd-info') for name_dd_ctx in name_dd_list : name_ctx = name_dd_ctx.find('li', class_='prd-name') if( name_ctx != None) : product_data.crw_name = name_ctx.get_text().strip() brand_ctx = name_dd_ctx.find('li', class_='prd-brand') if( brand_ctx != None) : product_data.crw_brand1 = brand_ctx.get_text().strip() #################################### # 가격 / 품절 여부 확인 # # # <p class="price-info"> # <strike>10,000</strike><br> # <span class="won">₩</span><span class="price">9,000</span> # </p> # #---- 품절시 ------- # # <p class="price-info"> # Sold Out # </p> # #################################### div_list = product_ctx.find_all('p', class_='price-info') for div_ctx in div_list : price_str = div_ctx.get_text().strip() if(0 <= price_str.find('Out')) : product_data.crw_is_soldout = 1 sell_ctx = div_ctx.find('span', class_='price') consumer_ctx = div_ctx.find('strike') if( consumer_ctx != None ) : product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip() )) if( sell_ctx != None ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( sell_ctx.get_text().strip() )) if( crw_post_url != '' ) : #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub( product_data, crw_post_url ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) category_ctx_list = soup.select('#layout_config_full > div.category_depth.clearbox > ul') for category_ctx in category_ctx_list : a_ctx_list = category_ctx.get_text().strip().split('>') idx = 0 for a_ctx in a_ctx_list : idx += 1 category_name = a_ctx.strip() if(idx == 2 ) : product_data.crw_category1 = category_name elif(idx == 3 ) : product_data.crw_category2 = category_name elif(idx == 4 ) : product_data.crw_category3 = category_name #product_data.crw_category1 = self.PAGE_URL_HASH[page_url] #################################### # 상품 이미지 확인 / 상품 링크 정보 / 상품번호 # # <a href="javascript:void(0)" onclick="display_goods_view('196','',this,'goods_view')"><span style="color:#000000;font-weight:normal;text-decoration:none;" class="goods_name">강아지 목줄/3M리드줄세트 <br>콤비네이션_베이지브라운</span></a> #################################### span_list = product_ctx.find_all('div', class_='goodsDisplayImageWrap') for span_ctx in span_list : product_link_ctx = span_ctx.find('a') if( product_link_ctx != None ) : if('onclick' in product_link_ctx.attrs ) : split_list = product_link_ctx.attrs['onclick'].split('display_goods_view(') sub_split_list = split_list[1].split(',') product_data.crw_goods_code = sub_split_list[0].replace("'","").strip() tmp_product_link = self.SITE_HOME + '/goods/view?no=' + product_data.crw_goods_code if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, span_ctx.attrs['href'].strip() ) crw_post_url = tmp_product_link if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'') img_list = product_link_ctx.find_all('img') for img_ctx in img_list : img_src = '' if('data-original' in img_ctx.attrs ) : img_src = img_ctx.attrs['data-original'].strip() elif('src' in img_ctx.attrs ) : img_src = img_ctx.attrs['src'].strip() if( img_src != '' ) : img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src ) product_data.product_img = self.get_hangul_url_convert( img_link ) #################################### # 상품명 # # <span style="color:#000000;font-weight:normal;text-decoration:none;" class="goods_name">강아지 목줄 <br>콤비네이션_옐로우레드</span> #################################### name_div_ctx = product_ctx.find('span', class_='goods_name') if( name_div_ctx != None) : product_data.crw_name = name_div_ctx.get_text().replace('\n',' ').strip() #################################### # 가격 # # <li> # <span class="price_txt">판매가</span> # <span style="color:#777777;font-weight:normal;text-decoration:line-through;" class="sale_price"> # 15,000 </span> # </li> # #################################### li_list = product_ctx.find_all('li') for li_ctx in li_list : title_ctx = li_ctx.find('span', class_='price_txt') value_ctx = li_ctx.find('span', class_='sale_price') if(title_ctx != None) and (value_ctx != None) : title_name = title_ctx.get_text().strip() title_value = value_ctx.get_text().strip() if( title_name == '판매가' ) : product_data.crw_price = int( __UTIL__.get_only_digit( title_value ) ) elif( title_name == '이벤트가' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( title_value ) ) if( crw_post_url != '' ) : #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub( product_data, crw_post_url ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### __LOG__.Trace(self.PAGE_URL_HASH[page_url]) split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category1 = split_data elif (idx == 2): product_data.crw_category2 = split_data elif (idx == 3): product_data.crw_category3 = split_data ''' div_list = soup.find_all( 'div' , class_='cntbody' ) for div_ctx in div_list : category_list = div_ctx.find_all( 'h2', class_='subtitle' ) for category_ctx in category_list : product_data.crw_category1 = category_ctx.get_text().strip() ''' ''' #################################### # 브랜드 추출 #################################### brand_div_list = product_ctx.find_all('span', class_='item_brand') for brand_ctx in brand_div_list : brand_name = brand_ctx.get_text().strip() if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip() ''' #################################### # 상품 이미지 확인 # # <div class="prdimg"><a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&ctype=1&cno1=1037"><img src="https://howlpotdesign.wisacdn.com/_data/product/d0dcc887757a47bd539823e77b7a3da6.jpg" width="292" height="292"></a></div> # #################################### img_div_list = product_ctx.find_all('div', class_='prdimg') for img_div_ctx in img_div_list: img_ctx = img_div_ctx.find('img') #for img_ctx in img_list : if (img_ctx != None): img_src = '' if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src) product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # 품절시 <div class="box out"> 으로 표현됨 # # <div class="box out"> # <div class="no">03</div> # <div class="img"> # 생략 # </div> # <div class="info"> # 생략 # </div> # </div> # #################################### if ('class' in product_ctx.attrs): class_name_list = product_ctx.attrs['class'] if (len(class_name_list) == 2): if (class_name_list[1] == 'out'): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 # # <div class="name"> # <a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&ctype=1&cno1=1037">메모리폼_라이트 그레이</a> # <span class="wish"><a href="#" onclick="wishPartCartAjax("41AE36ECB9B3EEE609D05B90C14222FB", this); return false;">관심상품 담기</a></span> # </div> # #################################### name_strong_list = product_ctx.find_all('div', class_='name') for name_strong_ctx in name_strong_list: product_link_ctx = name_strong_ctx.find('a') if (product_link_ctx != None): #__LOG__.Trace( product_link_ctx ) if ('href' in product_link_ctx.attrs): product_data.crw_name = product_link_ctx.get_text( ).strip() tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?pno=') second_split_list = split_list[1].split('&') product_data.crw_goods_code = second_split_list[ 0].strip() #################################### # 가격 # # <div class="price"> # <span class="sell"><span class="font">98,000</span></span> # </div> # #################################### div_list = product_ctx.find_all('div', class_='price') for div_ctx in div_list: sell_ctx = div_ctx.find('span', class_='sell') consumer_ctx = div_ctx.find('span', class_='consumer') if (consumer_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip())) if (sell_ctx != None): # 타임세일일때 뒷부분의 별도의 값이 붙어서, 값 이상 문제 해결법, crw_price_sale = sell_ctx.get_text().strip().split('\n') product_data.crw_price_sale = int( __UTIL__.get_only_digit(crw_price_sale[0].strip())) if (crw_post_url != ''): #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub(product_data, crw_post_url) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_third(product_data, soup) # 상품 이미지 확인 # <a href="/product/detail.html?product_no=417&cate_no=29&display_group=1" name="anchorBoxName_417"><img src="//www.wefam.co.kr/web/product/medium/201704/417_shop1_670038.jpg?cmd=thumb&width=300&height=300" data-original="//www.wefam.co.kr/web/product/medium/201704/417_shop1_670038.jpg?cmd=thumb&width=300&height=300" id="eListPrdImage417_1" alt="" class="thumb" style="display: inline; opacity: 1;"></a> ########################### img_link_list = product_ctx.find_all('a') for img_link_ctx in img_link_list: if ('name' in img_link_ctx.attrs): if (0 <= img_link_ctx.attrs['name'].find('anchorBoxName_')): tmp_product_link = img_link_ctx.attrs['href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, img_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?product_no=') crw_goods_code_list = split_list[1].strip().split('&') product_data.crw_goods_code = crw_goods_code_list[ 0].strip() img_ctx = img_link_ctx.find('img') if (img_ctx != None): if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip( ) if (img_src != ''): img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src) if (product_data.product_img == ''): product_data.product_img = self.get_hangul_url_convert( img_link) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) # # 상품명 / 가격 # # <ul class="xans-product-listitem"> # <li class="manu">we.fam (위팸)</li> # <li class="second"><span><span style="font-size:13px;color:#2e2e2e;">Zigzag Pattern Sleeveless_Navy</span></span></li> # <li class="custom">25,000 WON</li> # <li class="price">15,800 WON</li> # </ul> # li_list = product_ctx.find_all('li') for li_ctx in li_list: if ('class' in li_ctx.attrs): class_name_list = li_ctx.attrs['class'] value_str = li_ctx.get_text().strip() if (class_name_list[0] == 'manu'): product_data.crw_brand1 = value_str elif (class_name_list[0] == 'second'): product_data.crw_name = value_str elif (class_name_list[0] == 'custom'): int_value_str = int(__UTIL__.get_only_digit(value_str)) if (int_value_str != 0): product_data.crw_price = int( __UTIL__.get_only_digit(value_str)) elif (class_name_list[0] == 'price'): product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: crw_post_url = '' #################################### # 상품 카테고리 추출 #################################### category_list = soup.select( 'body > table > tr > td > table > tr > td.outline_side > div.indiv > form > table> tr > td > b > a' ) for category_ctx in category_list: crw_category = category_ctx.get_text().strip() break # 유효한 카테고리 체크 if (self.check_ignore_category_text(crw_category)): product_data = ProductData() product_data.crw_category1 = crw_category #################################### # 상품 이미지 확인 #################################### img_ctx = product_ctx.find('img') if (img_ctx != None): img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): tmp_img_link = self.BASIC_IMAGE_URL + '/shop' + img_src img_link = tmp_img_link.replace('..', '') product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # <img src="/shop/data/skin/freemart/img/icon/good_icon_soldout.gif"> #################################### img_list = product_ctx.find('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (0 <= img_src.find('soldout')): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 #################################### # # 상품 링크 정보 및 상품명 / 상품코드 is_product_name = True is_product_link = True product_link_list = product_ctx.find_all('a') for product_link_ctx in product_link_list: product_name = product_link_ctx.get_text().strip() # 첫번때 A link에 있는 Text if (is_product_name) and (product_name != ''): product_data.crw_name = product_name is_product_name = False if (is_product_link): if ('href' in product_link_ctx.attrs): tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (tmp_product_link.find('javascript') < 0): if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?goodsno=') sub_split_list = split_list[1].strip().split( '&') product_data.crw_goods_code = sub_split_list[ 0].strip() is_product_link = False #################################### # 가격 #################################### div_list = product_ctx.find_all('div') for div_ctx in div_list: cost_ctx = div_ctx.find('b') if (cost_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( cost_ctx.get_text().strip())) if (crw_post_url != ''): if (self.PRODUCT_URL_HASH.get(crw_post_url, -1) == -1): self.set_product_data_sub(product_data, crw_post_url) #self.print_product_page_info( product_data ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' self.reset_product_category(product_data) #################################### # 상품 카테고리 추출 #################################### self.get_category_value( product_data, page_url, soup ) #################################### # 상품 이미지 확인 # # <div class="prdImg scroll-fade"> # <a href="/product/방수커버/57/category/61/display/1/" name="anchorBoxName_57"> # <img src="//eledog.co.kr/web/product/medium/202011/74b4cc12fc9dd4f38c49d4de2d2f6b51.jpg" id="eListPrdImage57_1" class="thumb_Img" alt="방수커버"> </a> # </div> # # class_='_org_img org_img _lazy_img' # class_='_org_img org_img owl-lazy' #################################### img_ctx = product_ctx.find('div', class_='prdImg') img_ = img_ctx.find('img', class_='thumb_Img') img_src = '' if('src' in img_.attrs ) : img_src = img_.attrs['src'].strip() if( img_src != '' ) : img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src ) product_data.product_img = self.get_hangul_url_convert( img_link ) crw_goods_code_ = img_ctx.find('a') if(crw_goods_code_ != '') : if('name' in crw_goods_code_.attrs ) : if( 0 <= crw_goods_code_.attrs['name'].find('anchorBoxName_')) : class_name_list = crw_goods_code_.attrs['name'] product_data.crw_goods_code = class_name_list.replace('anchorBoxName_','') #################################### # 품절여부 추출 # # <div class="promotion"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/ico_product_soldout.gif" class="icon_img" alt="품절"> </div> # #################################### soldout_div_list = product_ctx.find_all('img', alt='품절') for soldout_div_ctx in soldout_div_list : product_data.crw_is_soldout = 1 #################################### # <div class="item_list_box"> # <div class="thumbnail"> # <div class="custom_pro dj_count30"><span class="dj-mov-fade-in-out2">21%</span></div> # <div class="button"> # <ul> # <li class="likeButton displaynone"><button type="button">LIKE<strong></strong></button></li> # </ul> # <ul class="other"> # <li class="option"><span>옵션보기</span> # </li> # <li class="cart"> # <a href="#none"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/btn_list_cart.gif" onclick="CAPP_SHOP_NEW_PRODUCT_OPTIONSELECT.selectOptionCommon(43, 42, 'basket', '')" alt="장바구니 담기" class="ec-admin-icon cart"></a><span>장바구니</span> # </li> # <li class="zoom"> # <a href="#none"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/btn_prd_zoom.gif" onclick="zoom('43', '42', '1','', '');" style="cursor:pointer" alt="상품 큰 이미지 보기"></a><span>확대보기</span> # </li> # <li class="pop"> # <a href="/product/ver02-맞춤이가방-카키/43/category/42/display/1/" target="blank"><img src="/_dj/img/button_other_04.png"></a><span>새창보기</span> # </li> # </ul> # </div> # <div class="prdImg scroll-fade"> # <a href="/product/ver02-맞춤이가방-카키/43/category/42/display/1/" name="anchorBoxName_43"> # <img src="//eledog.co.kr/web/product/medium/202011/d05fff61e25635e2dae85c6a7dadce63.jpg" id="eListPrdImage43_1" class="thumb_Img" alt="[ver.02] 맞춤이가방 : 카키"> </a> # </div> # </div> # <div class="description" onclick="window.location.href='/product/ver02-맞춤이가방-카키/43/category/42/display/1/';"> # <div class="inner"> # <div class="displaynone"> # </div> # <div class="brand displaynone"></div> # <strong class="name"><a href="/product/ver02-맞춤이가방-카키/43/category/42/display/1/" class=""><span style="font-size:12px;color:#555555;">[ver.02] 맞춤이가방 : 카키</span></a></strong> # <ul class="spec"> # <li class="summary_line displaynone"> # </li> # <li class="summary displaynone"></li> # <li class="price_all"> # <span class="custom ">188,000원</span> # <span class="price displaynone"><span class="strike">149,000원</span><span class="pri">149,000원</span></span> # <span class="sale displaynone"></span> # </li> # </ul> # <div class="icon"> # <div class="promotion"> <img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/ico_product_recommended.gif" class="icon_img" alt="추천"> </div> # </div> # </div> # </div> # </div> # #################################### name_div_list = product_ctx.find_all('div', class_='description') for name_div_ctx in name_div_list : h2_list = name_div_ctx.find_all('strong') for h2_ctx in h2_list : product_link_ctx = name_div_ctx.find('a') if( product_link_ctx != None) : if('href' in product_link_ctx.attrs ) : product_data.crw_name = h2_ctx.get_text().strip() crw_post_url = self.get_crw_post_url( product_link_ctx, 'href') #################################### # 가격 # # <li class="price_all"> # <span class="custom ">188,000원</span> # <span class="price displaynone"><span class="strike">149,000원</span><span class="pri">149,000원</span></span> # <span class="sale displaynone"></span> # </li> # #################################### price_div_list = product_ctx.find_all('li', class_='price_all') for price_ctx in price_div_list : p_list = name_div_ctx.find_all('span') for p_ctx in p_list : if('class' in p_ctx.attrs ) : class_name_list = p_ctx.attrs['class'] if(class_name_list[0] == 'custom' ) : product_data.crw_price = int( __UTIL__.get_only_digit( p_ctx.get_text().strip() ) ) elif(class_name_list[0] == 'pri' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( p_ctx.get_text().strip() )) if( crw_post_url != '' ) : #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : self.set_product_data_sub( product_data, crw_post_url ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True