def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second( product_data, product_ctx ) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx ) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx , 'p', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx , 'strong', 'name') ############################## # 가격 # <ul class="xans-element- xans-product xans-product-listitem"><li class=" xans-record-"> # <strong class="title displaynone"><span style="font-size:12px;color:#555555;font-weight:bold;"></span> :</strong><span style="font-size:12px;color:#555555;font-weight:bold;">₩15,000</span><span id="span_product_tax_type_text" style=""> </span></li> # </ul> ############################## li_list = product_ctx.find_all('li') li_num = 0 for li_ctx in li_list : li_num += 1 value_str = li_ctx.get_text().strip() split_list = value_str.split(':') sub_split_list = split_list[1].split('(') price_str = sub_split_list[0].strip() if( li_num == 1) : product_data.crw_price = int( __UTIL__.get_only_digit( price_str ) ) elif( li_num == 2) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( price_str )) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 # #<div class="status"> #<div class="icon"> <img src="/web/upload/custom_6.gif" alt=""></div> #</div> #self.set_product_soldout_first(product_data, product_ctx ) soldout_div_list = product_ctx.find_all( self.C_PRODUCT_SOLDOUT_SELECTOR, class_=self.C_PRODUCT_SOLDOUT_SELECTOR_CLASSNAME) for soldout_div_ctx in soldout_div_list: img_list = soldout_div_ctx.find_all('img') for img_ctx in img_list: if ('src' in img_ctx.attrs): if (0 <= img_ctx.attrs['src'].find('custom_6.gif')): product_data.crw_is_soldout = 1 ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'strong', 'name') ############################## # 가격 ############################## self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # # <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb"> ########################### self.set_product_image_second( product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'div', '-name') ############################ # 가격 및 브랜드 # #<div class="xans-element- xans-product xans-product-listitem -description"><div rel="판매가" class=" xans-record-"> #<span class="title displaynone"><span style="font-size:12px;color:#333333;font-weight:bold;">판매가</span> :</span> <span style="font-size:12px;color:#333333;font-weight:bold;">39,000원</span><span id="span_product_tax_type_text" style=""> </span></div> #</div> # ############################ price_div_list = product_ctx.find_all('div', {'rel':'판매가'}) for price_div_ctx in price_div_list : span_list = price_div_ctx.find_all('span') for span_ctx in span_list : value_str = span_ctx.get_text().strip() if(value_str != '') and (value_str.find('판매가') < 0) and (value_str.find(':') < 0) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( value_str )) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 # # 장바구니 이미지가 없으면 품절 # <img src="/web/upload/icon_201905151555185500.png" onclick="CAPP_SHOP_NEW_PRODUCT_OPTIONSELECT.selectOptionCommon(856, 163, 'basket', '')" alt="장바구니 담기" class="ec-admin-icon cart"> soldout_img_ctx = product_ctx.find('img', class_='ec-admin-icon cart') if (soldout_img_ctx == None): product_data.crw_is_soldout = 1 ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'strong', 'name') ############################## # 가격 ############################## price_ctx = product_ctx.find('p', class_='prices') if (price_ctx != None): span_ctx = price_ctx.find('span', class_='price normal') if (span_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(span_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_third(product_data, soup) #self.set_product_category_second(page_url, product_data, soup) title_list = soup.select( '#-common > div > div > div.xans-element-.xans-product.xans-product-menupackage > div.xans-element-.xans-product.xans-product-headcategory.titleArea.sub' ) for title_ctx in title_list: h2_ctx = title_ctx.find('h2') if (h2_ctx != None): product_data.crw_category1 = h2_ctx.get_text().strip() break ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 # #self.set_product_soldout_first(product_data, product_ctx ) sold_out_ctx = product_ctx.find('img', class_='ec-admin-icon cart') if (sold_out_ctx == None): product_data.crw_is_soldout = 1 ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'div', 'name') ############################## # 가격 # ############################## self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) # 상품 이미지 확인 self.set_product_image_second(product_data, product_ctx) product_link_list = product_ctx.find_all('a') for product_link_ctx in product_link_list: if ('href' in product_link_ctx.attrs): tmp_product_link = product_link_ctx.attrs['href'].strip() if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?product_no=') crw_goods_code_list = split_list[1].strip().split('&') product_data.crw_goods_code = crw_goods_code_list[0].strip( ) name_div_list = product_ctx.find_all('div', class_='name') for name_div_ctx in name_div_list: # # 상품 링크 정보 및 상품명 / 상품코드 product_data.crw_name = name_div_ctx.get_text().strip() self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) #product_data.crw_category1 = self.PAGE_URL_HASH[page_url] ########################### # 상품 이미지 확인 # ########################### self.set_product_image_third(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'div', 'name') ############################## # 가격 # # <div> # <p><span class="info displaynone"> / </span><span class="price normal">19,900 won</span><span class="price normal displaynone"></span></p> # <p class="icon"></p> # </div> ############################## #self.set_product_price_brand_first(product_data, product_ctx) price_ctx = product_ctx.find('span', class_='price normal') if (price_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #product_data.crw_category1 = self.PAGE_URL_HASH[page_url] #self.set_product_category_third(product_data, soup) self.set_product_category_second(page_url, product_data, soup) sub_cate = product_data.crw_category1 if(self.TOP_CATEGORY_NAME.get(sub_cate, -1) != -1) : product_data.crw_category3 = product_data.crw_category2 product_data.crw_category2 = product_data.crw_category1 product_data.crw_category1 = self.TOP_CATEGORY_NAME[sub_cate] ########################### # 상품 이미지 확인 ########################### self.set_product_image_second( product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'strong', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'p', 'name') ############################## # 가격 ############################## self.set_product_price_brand_second(product_data, product_ctx) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) #self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 ########################### self.set_product_image_third(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'p', 'name') ############################## # 가격 # ############################## li_list = product_ctx.find_all('li') for li_ctx in li_list: span_ctx = li_ctx.find_all('span') if (1 < len(span_ctx)): split_list = span_ctx[0].get_text().strip().split('(') value_str = split_list[0].strip() product_data.crw_price_sale = int( __UTIL__.get_only_digit(value_str)) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 # # <div class="sold"><img src="/web/upload/icon_201709051101436600.gif" class="icon_img" alt="" title=""></div> soldout_div_ctx = product_ctx.find('div', class_='sold') if (soldout_div_ctx != None): soldout_img_ctx = soldout_div_ctx.find('img') if (soldout_img_ctx != None): product_data.crw_is_soldout = 1 ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'p', 'name') ############################## # 가격 # ############################## self.set_product_price_brand_second(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth( product_data, product_ctx ) # 품절여부 확인 #self.set_product_soldout_first(product_data, product_ctx ) ########################### # ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx , 'strong', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_first( product_data, product_ctx , 'p', 'name') # 품절여부 확인 # 상품명에 품절이라고 표시함 # <strong class="name"><a href="/product/detail.html?product_no=154&cate_no=54&display_group=1" class=""><span class="title displaynone"><span style="font-size:13px;color:#1c1c1c;font-weight:bold;">상품명</span> :</span> <span style="font-size:13px;color:#1c1c1c;font-weight:bold;">(품절) Hanbok Embroidery Cape - Navy</span></a></strong> if(0 <= product_data.crw_name.find('(품절)') ) : product_data.crw_is_soldout = 1 ############################## # ############################## self.set_product_price_brand_first(product_data, product_ctx) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second( product_data, product_ctx ) # 품절여부 확인 # #self.set_product_soldout_first(product_data, product_ctx ) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'strong', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'p', 'name') if( 0 < product_data.crw_name.find('[품절]') ) or ( 0 < product_data.crw_name.find('-품절-') ) : product_data.crw_is_soldout = 1 ############################## # 가격 # ############################## self.set_product_price_brand_second(product_data, product_ctx) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_fourth( product_data, product_ctx, 'strong', 'name') ############################## # 가격 # # <p class="price">KRW 46,000</p> ############################## price_ctx = product_ctx.find('p', class_='price') if (price_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category1 = split_data elif (idx == 2): product_data.crw_category2 = split_data elif (idx == 3): product_data.crw_category3 = split_data # self.set_product_category_third(product_data, soup) # self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') ############################## # 가격 ############################## self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_third(product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_fourth( product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'strong', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'p', 'name') ############################## # #<p><strike class="displaynone"></strike><strong class="price">25,000원</strong></p> ############################## strong_ctx = product_ctx.find('strong', class_='price') if(strong_ctx != None) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( strong_ctx.get_text().strip() )) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_third(product_data, soup) ########################### # 상품 이미지 확인 # # <a name="anchorBoxName_741" href="/product/detail.html?product_no=741&cate_no=42&display_group=1" class="prdImg"><img src="//babiana.co.kr/web/product/medium/201907/23d619d612a7e377f9f6eb3a8ffd193a.jpg" style="border: 1px solid transparent;" class="borderEffect" onmouseover="this.style.border='1px solid #8bc1c6'" onmouseout="this.style.border='1px solid transparent'"></a> ########################### self.set_product_image_first(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') price_ctx = product_ctx.find('p', class_='price') if (price_ctx != None): product_data.crw_price_sale = int( __UTIL__.get_only_digit(price_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # # self.set_product_category_first(product_data, soup) product_data.crw_category1 = self.PAGE_URL_HASH[page_url] ########################### # 상품 이미지 확인 # ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'strong', 'name') ############################## # 가격 ############################## self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) # 상품 이미지 확인 self.set_product_image_fourth(product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'div', 'description') self.set_product_price_brand_first(product_data, product_ctx ) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) #__LOG__.Trace( self.PAGE_URL_HASH[page_url] ) self.set_product_category_third(product_data, soup) split_list = self.PAGE_URL_HASH[page_url].split('|') idx = 0 for split_data in split_list: idx += 1 if (idx == 1): product_data.crw_category2 = split_data.strip() elif (idx == 2): product_data.crw_category3 = split_data.strip() # 상품 이미지 확인 self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'div', 'description') self.set_product_price_brand_second(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # # <div class="thumbnail"> # <a href="/product/detail.html?product_no=42&cate_no=70&display_group=1" name="anchorBoxName_42"><img src="//cocotail.co.kr/web/product/medium/201909/cc8ffbd9cd198a80d979f47bcb24cc6e.jpg" onmouseover="this.src='//cocotail.co.kr/web/product/big/201909/027c792045bf9ece1e6c77312f20ff3d.jpg'" onmouseout="this.src='//cocotail.co.kr/web/product/medium/201909/cc8ffbd9cd198a80d979f47bcb24cc6e.jpg'" id="eListPrdImage42_1" alt=""></a> # <span class="wish"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/btn_wish_before.png" class="icon_img ec-product-listwishicon" alt="관심상품 등록 전" productno="42" categoryno="70" icon_status="off" login_status="T" individual-set="F"></span> # </div> ########################### self.set_product_image_fourth( product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=81&cate_no=56&display_group=1"><span style="font-size:12px;color:#333333;"><b><font color="ff0000"></font><b>LUXURY SOFA</b><br><span style="color: rgb(0,0,0);">블랙</span></b></span></a><b><br><span class="zoom"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/btn_prd_zoom.gif" onclick="zoom('81', '56', '1','', '');" style="cursor:pointer" alt="상품 큰 이미지 보기"></span> # </b></p> ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx , 'strong', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_first( product_data, product_ctx , 'p', 'name') self.set_product_price_brand_first(product_data, product_ctx ) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # # <img src="//ai-doggi.com/web/product/medium/20191220/a8ebb002293a954628763cf4a9ab6c38.jpg" alt="" class="thumb"> ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=286&cate_no=43&display_group=1"><strong class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</strong> <span style="font-size:12px;color:#555555;">Frill Neck Sleeve Blouse Lavender [20%SALE]</span></a> # </p> ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'strong', 'name') ############################## # # ############################## self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 # # <a href="/product/detail.html?product_no=142&cate_no=55&display_group=1" name="anchorBoxName_142"><img src="//cokemill.com/web/product/medium/201810/be20e9b9a3981ddfea24803c128c21ad.jpg" alt="" class="thumb"></a> ########################### self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # # <p class="name"> # <a href="/product/detail.html?product_no=144&cate_no=55&display_group=1"><span style="font-size:12px;color:#555555;">HOT!!!!시나몬라떼 니트조끼(카멜)</span></a><br><span class="zoom"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/btn_prd_zoom.gif" onclick="zoom('144', '55', '1','', '');" style="cursor:pointer" alt="상품 큰 이미지 보기"></span> # </p> ########################### crw_post_url = self.set_product_name_url_third( product_data, product_ctx, 'p', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_third( product_data, product_ctx, 'strong', 'name') self.set_product_price_brand_second(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # # <div class="thumbnail"> # <a href="/product/detail.html?product_no=335&cate_no=24&display_group=1" name="anchorBoxName_335"><img src="//cjgsfood1.cafe24.com/web/product/medium/201906/8161292e023f5f4e3d99fba4801742ab.jpg" id="eListPrdImage335_1" alt="돼지꼬리뼈 100g"></a> # <span class="wish"></span> # </div> ########################### self.set_product_image_fourth( product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) ########################### # # <strong class="name"><a href="/product/detail.html?product_no=335&cate_no=24&display_group=1" class=""><span class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</span> <span style="font-size:12px;color:#555555;">돼지꼬리뼈 100g</span></a></strong> ########################### crw_post_url = self.set_product_name_url_first( product_data, product_ctx , 'strong', 'name') if(crw_post_url == '') : crw_post_url = self.set_product_name_url_first( product_data, product_ctx , 'p', 'name') self.set_product_price_brand_second(product_data, product_ctx ) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) ########################### # 상품 이미지 확인 # ########################### self.set_product_image_third(product_data, product_ctx) # 품절여부 확인 # self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_fifth( product_data, product_ctx, 'div', 'prd_name') ############################## # 가격 # ############################## self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # 각 사이트별로 변경 # product_ctx -- > li # <a href="/product/detail.html?product_no=471&cate_no=69&display_group=1" class=""> # <span class="title displaynone"><span style="font-size:12px;color:#555555;">상품명</span> :</span> <span style="font-size:12px;color:#555555;">러버스 보호자용 반팔 티셔츠</span></a> # http://pet-paradise.kr/category/%EC%99%B8%EC%B6%9C/6040/?page=5 # # <meta property="og:description" content="SIDE BY SIDE" /> try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # # <meta property="og:description" content="SIDE BY SIDE" /> # self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) # 상품 이미지 확인 self.set_product_image_third(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) # # 상품 링크 정보 및 상품명 # crw_post_url = self.set_product_name_url_first( product_data, product_ctx, 'div', 'df-prl-desc') self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # 각 사이트별로 변경 # product_ctx -- > li # <a href="/product/스누피-3way-유모차-6kg/1696/category/6040/display/1/" class=""><span class="title displaynone"><span style="font-size:12px;color:#555555;font-weight:bold;">상품명</span> :</span> <span style="font-size:12px;color:#555555;font-weight:bold;">스누피 3WAY 유모차 (6kg)</span></a> # http://pet-paradise.kr/category/%EC%99%B8%EC%B6%9C/6040/?page=5 try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 #split_list = page_url.split('/') #if(4 < len(split_list) ) : product_data.crw_category1 = split_list[4].strip() #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) # 상품 이미지 확인 self.set_product_image_fourth(product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) # # 상품 링크 정보 및 상품명 # crw_post_url = self.set_product_name_url_second( product_data, product_ctx , 'div', 'description') self.set_product_price_brand_first(product_data, product_ctx ) if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # # <meta property="og:description" content="SIDE BY SIDE" /> try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # # <meta property="og:description" content="생활 • 식품"> #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) # 상품 이미지 확인 self.set_product_image_fourth(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) # # 상품 링크 정보 및 상품명 # crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'div', 'description') self.set_product_price_brand_first(product_data, product_ctx) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: crw_post_url = '' #################################### # 상품 카테고리 추출 #################################### category_list = soup.select( 'body > table > tr > td > table > tr > td.outline_side > div.indiv > form > table> tr > td > b > a' ) for category_ctx in category_list: crw_category = category_ctx.get_text().strip() break # 유효한 카테고리 체크 if (self.check_ignore_category_text(crw_category)): product_data = ProductData() product_data.crw_category1 = crw_category #################################### # 상품 이미지 확인 #################################### img_ctx = product_ctx.find('img') if (img_ctx != None): img_src = '' if ('data-original' in img_ctx.attrs): img_src = img_ctx.attrs['data-original'].strip() elif ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (img_src != ''): tmp_img_link = self.BASIC_IMAGE_URL + '/shop' + img_src img_link = tmp_img_link.replace('..', '') product_data.product_img = self.get_hangul_url_convert( img_link) #################################### # 품절여부 추출 # <img src="/shop/data/skin/freemart/img/icon/good_icon_soldout.gif"> #################################### img_list = product_ctx.find('img') for img_ctx in img_list: img_src = '' if ('src' in img_ctx.attrs): img_src = img_ctx.attrs['src'].strip() if (0 <= img_src.find('soldout')): product_data.crw_is_soldout = 1 #################################### # 상품 링크 정보 및 상품명 / 상품코드 #################################### # # 상품 링크 정보 및 상품명 / 상품코드 is_product_name = True is_product_link = True product_link_list = product_ctx.find_all('a') for product_link_ctx in product_link_list: product_name = product_link_ctx.get_text().strip() # 첫번때 A link에 있는 Text if (is_product_name) and (product_name != ''): product_data.crw_name = product_name is_product_name = False if (is_product_link): if ('href' in product_link_ctx.attrs): tmp_product_link = product_link_ctx.attrs[ 'href'].strip() if (tmp_product_link.find('javascript') < 0): if (0 != tmp_product_link.find('http')): tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip()) crw_post_url = tmp_product_link if (self.C_PRODUCT_STRIP_STR != ''): crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR, '') split_list = crw_post_url.split('?goodsno=') sub_split_list = split_list[1].strip().split( '&') product_data.crw_goods_code = sub_split_list[ 0].strip() is_product_link = False #################################### # 가격 #################################### div_list = product_ctx.find_all('div') for div_ctx in div_list: cost_ctx = div_ctx.find('b') if (cost_ctx != None): product_data.crw_price = int( __UTIL__.get_only_digit( cost_ctx.get_text().strip())) if (crw_post_url != ''): if (self.PRODUCT_URL_HASH.get(crw_post_url, -1) == -1): self.set_product_data_sub(product_data, crw_post_url) #self.print_product_page_info( product_data ) self.process_product_api(product_data) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self , page_url, soup, product_ctx ) : # # try : product_data = ProductData() crw_post_url = '' # 상품 카테고리 # #self.set_product_category_first(product_data, soup) self.set_product_category_second(page_url, product_data, soup) # 상품 이미지 확인 self.set_product_image_first(product_data, product_ctx ) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx ) name_div_list = product_ctx.find_all('div', class_='sp-product__title') for name_div_ctx in name_div_list : product_link_list = name_div_ctx.find_all('a') for product_link_ctx in product_link_list : if('href' in product_link_ctx.attrs ) : span_list = product_link_ctx.find_all('span') for span_ctx in span_list : name_value = span_ctx.get_text().strip() if(0 != name_value.find('상품명') ) and (0 != name_value.find(':') ) : product_data.crw_name = name_value tmp_product_link = product_link_ctx.attrs['href'].strip() if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() ) crw_post_url = tmp_product_link if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'') split_list = crw_post_url.split('/') if( product_data.crw_name == '') : product_data.crw_name = split_list[4].strip() product_data.crw_goods_code = split_list[5].strip() div_list = product_ctx.find_all('div') for div_ctx in div_list : if('rel' in div_ctx.attrs) : title_name = div_ctx.attrs['rel'] span_list = div_ctx.find_all('span') for span_ctx in span_list : span_value = span_ctx.get_text().strip() if(span_value != '브랜드' ) and (span_value != '판매가' ) and (span_value != '할인가' ) and (span_value != '' ) : if(title_name == '판매가') : product_data.crw_price = int( __UTIL__.get_only_digit( span_value ) ) if(title_name == '할인가') : product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_value ) ) if(title_name == '브랜드') : product_data.crw_brand1 = span_value if( crw_post_url != '' ) : self.set_product_url_hash( product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True
def set_product_data(self, page_url, soup, product_ctx): # # try: product_data = ProductData() crw_post_url = '' # 상품 카테고리 # self.set_product_category_first(product_data, soup) #self.set_product_category_second(page_url, product_data, soup) ########################### # 상품 이미지 확인 ########################### self.set_product_image_second(product_data, product_ctx) # 품절여부 확인 self.set_product_soldout_first(product_data, product_ctx) ########################### # 상품명/URL ########################### crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'strong', 'name') if (crw_post_url == ''): crw_post_url = self.set_product_name_url_second( product_data, product_ctx, 'p', 'name') ############################## # 가격 # ############################## price_all_ctx = product_ctx.find('li', class_='price_all') if (price_all_ctx != None): custom_list = price_all_ctx.find_all('span', class_='custom') for custom_ctx in custom_list: product_data.crw_price = int( __UTIL__.get_only_digit(custom_ctx.get_text().strip())) custom_list = price_all_ctx.find_all('span', class_='pri') for custom_ctx in custom_list: product_data.crw_price_sale = int( __UTIL__.get_only_digit(custom_ctx.get_text().strip())) custom_list = price_all_ctx.find_all('span', class_='strike') for custom_ctx in custom_list: product_data.crw_price_sale = int( __UTIL__.get_only_digit(custom_ctx.get_text().strip())) if (crw_post_url != ''): self.set_product_url_hash(product_data, crw_post_url) rtn = True except Exception as ex: __LOG__.Error('에러 : set_product_data') __LOG__.Error(ex) pass return True