Exemple #1
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=111062&amp;xcode=020&amp;mcode=004&amp;scode=&amp;type=X&amp;sort=regdate&amp;cur_code=020&amp;GfDT=bmx9W1w%3D"><img class="MS_prod_img_m" src="/shopimages/affetto/0200040000052.jpeg?1563325993"></a></dt>
            #
            ####################################

            img_div_list = product_ctx.find_all('dt', class_='thumb')
            for img_div_ctx in img_div_list:
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                product_link_ctx = img_div_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)

            ####################################
            # 상품명 및 브랜드
            #
            # <li class="prd-name"><a href="/shop/shopdetail.html?branduid=111062&amp;xcode=020&amp;mcode=004&amp;scode=&amp;type=X&amp;sort=regdate&amp;cur_code=020&amp;GfDT=bmx9W1w%3D">이글루 하우스 - M / L</a></li>
            ####################################
            name_dd_list = product_ctx.find_all('dd', class_='prd-info')
            for name_dd_ctx in name_dd_list:
                name_ctx = name_dd_ctx.find('li', class_='prd-name')
                if (name_ctx != None):
                    product_data.crw_name = name_ctx.get_text().strip()

                brand_ctx = name_dd_ctx.find('li', class_='prd-brand')
                if (brand_ctx != None):
                    product_data.crw_brand1 = brand_ctx.get_text().strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            #
            # <li class="prd-price">
            # <strike>95,000원</strike>
            # <span class="price">85,500원</span>
            # </li>
            #
            # ---- 품절시 ----
            # <li class="prd-price">
            # Sold Out
            # </li>
            ####################################

            div_list = product_ctx.find_all('li', class_='prd-price')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('span', class_='price')
                consumer_ctx = div_ctx.find('strike')

                soldout_str = div_ctx.get_text().strip()
                if (0 < soldout_str.find('Out')):
                    product_data.crw_is_soldout = 1

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #2
0
    def set_product_data(self, page_url, soup, img_ctx, name_ctx, price_ctx):

        #
        #
        try:

            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <a href="/shop/shopdetail.html?branduid=624477&amp;xcode=032&amp;mcode=002&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=032&amp;GfDT=Z213UQ%3D%3D"><img class="MS_prod_img_s" src="/shopimages/lovespet/0320020000533.gif?1590117644" alt=""></a>
            #
            ####################################

            img_list = img_ctx.find_all('img')
            for img_ctx in img_list:
                img_src = ''
                if ('src' in img_ctx.attrs):
                    split_list = img_ctx.attrs['src'].strip().split('?')
                    img_src = split_list[0].strip()

                if (img_src != ''):
                    img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src)
                    product_data.product_img = self.get_hangul_url_convert(
                        img_link)
                    break

            ####################################
            # 상품명 및 브랜드
            #
            # <strong class="name"><a href="/shop/shopdetail.html?branduid=624477&amp;xcode=032&amp;mcode=002&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=032&amp;GfDT=Z213UQ%3D%3D">도기스타 쿨하네스 ( S ~ XL )</a></strong>
            ####################################

            product_data.crw_name = name_ctx.get_text().strip()
            product_link_ctx = name_ctx.find('a')
            if (product_link_ctx != None):
                if ('href' in product_link_ctx.attrs):
                    crw_post_url = self.get_crw_post_url(
                        product_link_ctx, 'href')
                    if (crw_post_url != ''):
                        self.get_crw_goods_code(product_data, crw_post_url)
                        self.get_category_value(product_data, crw_post_url)

            ####################################
            # 가격 / 품절 여부 확인
            #
            # <li class="price">
            # <p class="price02"><strike>₩24,000</strike></p>
            # <p class="price03">₩24,000</p>
            # </li>
            #
            # ---- 품절시 ------
            # <li class="price">
            # <div class="sold">[품절상품]</div>
            # </li>
            ####################################

            sell_ctx = price_ctx.find('p', class_='price03')
            consumer_ctx = price_ctx.find('p', class_='price02')
            soldout_ctx = price_ctx.find('div', class_='sold')
            if (soldout_ctx != None): product_data.crw_is_soldout = 1

            if (consumer_ctx != None):
                product_data.crw_price = int(
                    __UTIL__.get_only_digit(consumer_ctx.get_text().strip()))

            if (sell_ctx != None):
                product_data.crw_price_sale = int(
                    __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #3
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <dt class="thumb"><a href="/shop/shopdetail.html?branduid=803964&amp;xcode=035&amp;mcode=003&amp;scode=&amp;type=Y&amp;sort=manual&amp;cur_code=035&amp;GfDT=aGV%2BVA%3D%3D"><img class="MS_prod_img_s" src="/shopimages/aromnaom/0330290001733.jpg?1581494094"></a></dt>
            #
            ####################################

            img_div_list = product_ctx.find_all('dt', class_='thumb')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            #
            # <li class="prd-name">[애니케어] 면역을 위한 멀티파우더 <span class="MK-product-icons"></span></li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='prd-name')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            # <li class="prd-price">74,800원</li>
            #
            # ---- 품절시  -------
            # <li class="prd-price">
            # <span class="fc-red">품절</span>
            # </li>
            ####################################

            div_list = product_ctx.find_all('ul')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('li', class_='prd-price')
                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))
                    soldout_ctx = div_ctx.find('span', class_='fc-red')
                    if (soldout_ctx != None): product_data.crw_is_soldout = 1

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #4
0
    def set_product_data_second(self, page_url, soup):

        #
        #
        try:
            product_data = ProductData()

            crw_post_url = page_url
            split_list = crw_post_url.split('?product_no=')
            crw_goods_code_list = split_list[1].strip().split('&')
            product_data.crw_goods_code = crw_goods_code_list[0].strip()

            # 상품 카테고리
            #

            product_data.crw_category1 = 'PRODUCT'
            split_list = self.PAGE_URL_HASH[page_url].split('|')
            idx = 0
            for split_data in split_list:
                idx += 1
                if (idx == 1): product_data.crw_category2 = split_data.strip()
                elif (idx == 2):
                    product_data.crw_category3 = split_data.strip()

            # 상품 이미지 확인

            img_list = soup.find_all('img', class_='BigImage')
            for img_ctx in img_list:
                if ('src' in img_ctx.attrs):
                    img_src = img_ctx.attrs['src'].strip()
                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        if (product_data.product_img == ''):
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            p_list = soup.find_all('p', class_='prd_model')
            for p_ctx in p_list:
                if (product_data.crw_name == ''):
                    product_data.crw_name = p_ctx.get_text().strip()

            # 품절여부 확인
            sold_out_ctx = soup.find('span', {'id': 'btnReserve'})
            if (sold_out_ctx != None):
                if ('class' in sold_out_ctx.attrs):
                    if ('displaynone' != sold_out_ctx.attrs['class'][0]):
                        product_data.crw_is_soldout = 1
                else:
                    product_data.crw_is_soldout = 1

            # 가격
            price_list = soup.find_all('div', class_='info_price')
            for price_ctx in price_list:
                sell_ctx = price_ctx.find('span', class_='sell')
                customer_ctx = price_ctx.find('span', class_='customer')
                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))
                if (customer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            customer_ctx.get_text().strip()))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #5
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <dd class="prd-img"><img class="MS_prod_img_s" src="/shopimages/ecofoam/0450010000053.jpg?1527467204" alt="상품 섬네일" title="상품 섬네일"></dd>
            #
            #
            # <dl class="item grid-item opa70" style="position: absolute; left: 0px; top: 0px;">
            #<a href="/shop/shopdetail.html?branduid=841206&amp;xcode=046&amp;mcode=004&amp;scode=&amp;type=Y&amp;sort=manual&amp;cur_code=046&amp;GfDT=bW53UQ%3D%3D">
            #
            #
            ####################################

            img_div_list = product_ctx.find_all('dd', class_='prd-img')
            for img_div_ctx in img_div_list:
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

            product_link_ctx = product_ctx.find('a')
            if (product_link_ctx != None):
                if ('href' in product_link_ctx.attrs):
                    crw_post_url = self.get_crw_post_url(
                        product_link_ctx, 'href')
                    if (crw_post_url != ''):
                        self.get_crw_goods_code(product_data, crw_post_url)
                        self.get_category_value(product_data, crw_post_url)

            ####################################
            # 상품명 및 브랜드
            #
            # <span class="prd-name ft_eb">도그자리 플랫<br></span>
            #
            # --- 품절시 상품명 ---
            # <span class="prd-name ft_eb">맘편한매트 소프트W<br>8세트(품절)</span>
            #
            # --- 브랜드 ---
            # <span class="prd-brand">도그자리</span>
            ####################################

            name_strong_ctx = product_ctx.find('span', class_='prd-name ft_eb')
            if (name_strong_ctx != None):
                crw_name = name_strong_ctx.get_text().strip()
                if (0 < crw_name.find('(품절)')):
                    product_data.crw_is_soldout = 1
                    tmp_crw_name = crw_name.replace('(품절)', '').strip()
                    crw_name = tmp_crw_name

                product_data.crw_name = crw_name

            name_strong_ctx = product_ctx.find('span', class_='prd-brand')
            if (name_strong_ctx != None):
                product_data.crw_brand1 = name_strong_ctx.get_text().strip()

            ####################################
            # 가격
            #
            # <span class="prd-price-discount"><del>75,000</del></span>
            #
            # <span class="prd-discount ft_eb">52,000&nbsp;원</span>
            #
            ####################################

            div_list = product_ctx.find_all('div', class_='prd-sub')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('span', class_='prd-discount ft_eb')
                consumer_ctx = div_ctx.find('span',
                                            class_='prd-price-discount')

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #6
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            category_ctx_list = soup.select(
                'body > div.body_wrap > div.content_wrap > div.section_tit > div.close'
            )

            for category_ctx in category_ctx_list:
                split_list = category_ctx.get_text().strip().split('>')
                idx = 0
                for split_data in split_list:
                    idx += 1
                    category_name = split_data.strip()
                    if (idx == 2): product_data.crw_category1 = category_name
                    elif (idx == 3): product_data.crw_category2 = category_name
                    elif (idx == 4): product_data.crw_category3 = category_name

            #split_list = self.PAGE_URL_HASH[page_url].split('(')
            #product_data.crw_category1 = split_list[0].replace('BEST','').strip()

            ####################################
            # 브랜드 추출
            #
            # <div class="line_sub">
            # 한국산				</div>
            ####################################

            div_list = product_ctx.find_all('div', class_='line_sub')
            for div_ctx in div_list:
                brand_str = div_ctx.get_text().strip()
                product_data.crw_brand1 = brand_str

            ####################################
            # 상품 이미지 확인 / 상품 링크 정보 / 상품번호
            #
            # <div class="picture"><a href="./product.html?pd_code=A010489&amp;event_type=%C3%CA%C6%AF%B0%A1"><img src="http://queenpuppy.co.kr/shop/pd_img/A01/489/A010489_2.jpg"></a></div>
            ####################################

            span_list = product_ctx.find_all('div', class_='picture')
            for span_ctx in span_list:
                product_link_ctx = span_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('&event_type=')
                        crw_post_url = split_list[0].strip()

                        split_list = crw_post_url.split('?pd_code=')
                        sub_split_list = split_list[1].strip().split('&')
                        product_data.crw_goods_code = sub_split_list[0]

                    img_list = product_link_ctx.find_all('img')
                    for img_ctx in img_list:
                        img_src = ''
                        if ('data-original' in img_ctx.attrs):
                            img_src = img_ctx.attrs['data-original'].strip()
                        elif ('src' in img_ctx.attrs):
                            img_src = img_ctx.attrs['src'].strip()

                        if (img_src != ''):
                            img_link = self.set_img_url(
                                self.BASIC_IMAGE_URL, img_src)
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            ####################################
            # 상품명
            #
            # <div class="name">
            # <div style="color:#fd705f; font-weight: bold; valign:top; height: 15px; padding-bottom: 3px;"></div>
            # <a href="./product.html?pd_code=A010489&amp;event_type=%C3%CA%C6%AF%B0%A1">
            # 건국유업 프로젝트 닥터케이 펫밀크 10개입										</a>
            # </div>
            ####################################
            name_div_list = product_ctx.find_all('div', class_='name')
            for name_div_ctx in name_div_list:
                span_ctx = name_div_ctx.find('a')
                if (span_ctx != None):
                    crw_name = span_ctx.get_text().strip()
                    product_data.crw_name = crw_name
                    if (0 < crw_name.find('[품절]')):
                        product_data.crw_is_soldout = 1
                        product_data.crw_name = crw_name.replace('[품절]',
                                                                 '').strip()

            ####################################
            # 가격
            #
            #
            # <div class="line_np">20,000원</div>
            # <div class="line_sp">
            # 12,000원
            # <span style="font-size: 0.8em; color: #666; vertical-align:bottom;">40%↓</span>									</div>
            ####################################

            div_list = product_ctx.find_all('div', class_='line_np')
            for div_ctx in div_list:
                price_str = div_ctx.get_text().strip()
                product_data.crw_price = int(
                    __UTIL__.get_only_digit(price_str))

            div_list = product_ctx.find_all('div', class_='line_sp')
            for div_ctx in div_list:
                price_str = div_ctx.get_text().strip()
                span_ctx = div_ctx.find('span')
                split_str = ''
                if (span_ctx != None): split_str = span_ctx.get_text().strip()
                if (split_str == ''):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(price_str.strip()))
                else:
                    split_list = price_str.split(split_str)
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(split_list[0].strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #7
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <div class="thumb salebox"> <a href="/shop/shopdetail.html?branduid=3356611&amp;xcode=002&amp;mcode=005&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=002&amp;GfDT=aG13UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/petnoriter/0020050000022.jpg?1590140914" alt="상품 섬네일"></a>
            # <input type="hidden" name="custom_price" value="49900">
            # <input type="hidden" name="product_price" value="28900">
            # <span class="sale_text" style="display: block;">42%</span> </div>
            #
            ####################################

            img_div_list = product_ctx.find_all('div', class_='thumb salebox')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            #
            # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='dsc')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            # <ul class="info">
            # <li class="dsc">논슬립 항균 배변 매트 원형/사각</li>
            # <li class="subname">배변걱정 이제그만~</li>
            # <li class="price">28,900원</li>
            # <li class="consumer">49,900원</li>
            # <li class="icon"><span class="MK-product-icons"></span></li>
            # <li class="closeup"><a class="btn-overlay-show" href="javascript:viewdetail('002005000002', '1', '');"><img src="/images/common/view_shopdetail2.gif" alt="미리보기"></a></li>
            # <li class="cboth icon_option"></li>
            # </ul>
            #
            #
            ####################################

            div_list = product_ctx.find_all('ul')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('li', class_='price')
                consumer_ctx = div_ctx.find('li', class_='consumer')
                soldout_ctx = div_ctx.find('li', class_='soldout')
                if (soldout_ctx != None): product_data.crw_is_soldout = 1

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #8
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            #__LOG__.Trace( page_url )
            #__LOG__.Trace( self.PAGE_URL_HASH[page_url])

            if (self.C_DETAIL_CATEGORY_VALUE.strip() != ''):
                split_list = self.PAGE_URL_HASH[page_url].split('|')
                idx = 0
                for split_data in split_list:
                    idx += 1
                    if (idx == 1): product_data.crw_category1 = split_data
                    elif (idx == 2): product_data.crw_category2 = split_data
                    elif (idx == 3): product_data.crw_category3 = split_data
            else:
                self.set_godo_category_data(soup, product_data)

            ####################################
            # 브랜드 추출
            #
            # <span class="item_brand">
            # <strong>[지그니쳐]</strong>
            # </span>
            #
            ####################################
            brand_div_list = product_ctx.find_all('span', class_='item_brand')
            for brand_ctx in brand_div_list:
                brand_name = brand_ctx.get_text().strip()
                if (brand_name != ''):
                    product_data.crw_brand1 = brand_name.replace(
                        '[', '').replace(']', '').strip()

            ####################################
            # 상품 이미지 확인
            #
            # <div class="item_photo_box">
            # <a href="../goods/goods_view.php?goodsNo=1000000896&amp;mtn=%5E%7C%5E%5E%7C%5E">
            # <img data-original="/data/goods/19/10/43/1000000896/1000000896_add2_085.jpg" width="250" alt="바잇밀 - 치킨/오리/말고기 샘플러 100g (3종)" title="바잇밀 - 치킨/오리/말고기 샘플러 100g (3종)" class="middle gd_image_lazy" src="/data/goods/19/10/43/1000000896/1000000896_add2_085.jpg" style="display: inline;">
            # </a>
            # </div>
            ####################################
            img_div_list = product_ctx.find_all('div', class_='item_photo_box')
            for img_div_ctx in img_div_list:
                img_ctx = img_div_ctx.find('img')
                #for img_ctx in img_list :

                if (img_ctx != None):
                    img_src = ''
                    if ('data-original' in img_ctx.attrs):
                        img_src = img_ctx.attrs['data-original'].strip()
                    elif ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        if (product_data.product_img == ''):
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            ####################################
            # 품절여부 추출 (2가지 방법)
            #
            # <div class="item_icon_box">
            # <img src="/data/icon/goods_icon/당일출고아이콘.gif" alt="벌써배송상품" title="벌써배송상품" class="middle">
            # <img src="/data/icon/goods_icon/icon_soldout.gif">
            # </div>
            #
            #------------------------------------
            #
            # <div class="item_photo_box">
            # <a href="../goods/goods_view.php?goodsNo=1000001614">
            # <img data-original="/data/goods/20/05/20/1000001614/1000001614_add2_099.jpg" width="250" alt="빅독 리틀 바이트 사료 - 악어고기 100g" title="빅독 리틀 바이트 사료 - 악어고기 100g" class="middle gd_image_lazy" src="/data/goods/20/05/20/1000001614/1000001614_add2_099.jpg" style="display: inline;">
            # <strong class="item_soldout_bg" style="background-image:url(/data/icon/goods_icon/soldout-1.png);">SOLD OUT</strong>
            # </a>
            # </div>
            #
            ####################################
            soldout_div_list = product_ctx.find_all('div',
                                                    class_='item_icon_box')
            for soldout_div_ctx in soldout_div_list:
                img_list = soldout_div_ctx.find_all('img')
                for img_ctx in img_list:
                    if ('src' in img_ctx.attrs):
                        if (0 < img_ctx.attrs['src'].find('soldout')):
                            product_data.crw_is_soldout = 1

            if (product_data.crw_is_soldout != 1):
                soldout_div_list = product_ctx.find_all(
                    'div', class_='item_photo_box')
                for soldout_div_ctx in soldout_div_list:
                    img_list = soldout_div_ctx.find_all(
                        'strong', class_='item_soldout_bg')
                    for img_ctx in img_list:
                        product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <div class="item_tit_box">
            # <a href="../goods/goods_view.php?goodsNo=1000001614">
            # <strong class="item_name">빅독 리틀 바이트 사료 - 악어고기 100g</strong>
            # </a>
            # </div>
            #
            ####################################
            name_strong_list = product_ctx.find_all('div',
                                                    class_='item_tit_box')
            for name_strong_ctx in name_strong_list:
                product_link_list = name_strong_ctx.find_all('a')
                for product_link_ctx in product_link_list:

                    if ('href' in product_link_ctx.attrs):
                        span_list = product_link_ctx.find_all('strong')
                        for span_ctx in span_list:
                            product_data.crw_name = span_ctx.get_text().strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?goodsNo=')
                        product_data.crw_goods_code = split_list[1].strip()

            ####################################
            # 가격
            #
            # <div class="item_money_box">
            # <strong class="item_price">
            # <span>23,000원 </span>
            # </strong>
            # </div>
            #
            ####################################

            div_list = product_ctx.find_all('div', class_='item_money_box')
            for div_ctx in div_list:
                del_ctx = div_ctx.find('del')
                strong_ctx = div_ctx.find('strong', class_='item_price')
                if (del_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(del_ctx.get_text().strip()))
                if (strong_ctx != None):
                    # 타임세일일때  뒷부분의 별도의 값이 붙어서, 값 이상 문제 해결법,
                    crw_price_sale = strong_ctx.get_text().strip().split('\n')
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(crw_price_sale[0].strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #9
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            location_ctx = soup.find('div', {'id': 'navigation-bar'})
            if (location_ctx != None):
                span_list = location_ctx.find_all('span')
                idx = 0
                for span_ctx in span_list:
                    idx += 1
                    if (idx == 2):
                        product_data.crw_category1 = span_ctx.get_text().strip(
                        )
                    elif (idx == 3):
                        product_data.crw_category2 = span_ctx.get_text().strip(
                        )
                    elif (idx == 4):
                        product_data.crw_category3 = span_ctx.get_text().strip(
                        )

            ####################################
            # 브랜드 추출
            ####################################

            product_data.crw_brand1 = product_data.crw_category1

            ####################################
            # 상품 이미지 확인
            #
            # <img class="item-image" src="/_vir0001/product_img/P1449_20200421AM94623_2.jpg" alt="img1">
            ####################################

            img_list = product_ctx.find_all('img', class_='item-image')
            for img_ctx in img_list:
                img_src = ''
                if ('data-original' in img_ctx.attrs):
                    img_src = img_ctx.attrs['data-original'].strip()
                elif ('src' in img_ctx.attrs):
                    img_src = img_ctx.attrs['src'].strip()

                if (img_src != ''):
                    img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src)
                    product_data.product_img = self.get_hangul_url_convert(
                        img_link)

            ####################################
            # 상품 링크 정보 및 상품코드
            #
            # <div class="item" style="cursor:pointer" onclick="goHref(1449,'P1449_20200421AM94623_2.jpg')">
            ####################################

            if ('onclick' in product_ctx.attrs):
                onclick_str = product_ctx.attrs['onclick'].strip()
                split_data = onclick_str.split('goHref(')
                sub_split_data = split_data[1].split(',')
                product_data.crw_goods_code = sub_split_data[0].strip()
                crw_post_url = 'http://www.montraum.com/common/process/shopview.asp?thisCategory=22&pack_content_id=' + product_data.crw_goods_code

            ####################################
            # 상품명 / 품절여부
            #
            # <p class="item-description" id="iconID1449" name="iconID1449">데일리관리 세트<br> (돈모 브러쉬+플러쉬 콤)</p>
            #
            # ---------- 품절시 -----------------------
            # <p class="item-description" id="iconID1095" name="iconID1095">트레이닝패드 L 120매 (30매 x 4개)<img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle">&nbsp;<img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle">&nbsp;<img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle">&nbsp;<img src="/_vir0001/process/partImages/icon_soldout.gif" align="absmiddle">&nbsp;</p>
            ####################################
            name_div_list = product_ctx.find_all('p',
                                                 class_='item-description')
            for name_div_ctx in name_div_list:
                product_data.crw_name = name_div_ctx.get_text().replace(
                    '\n', ' ').strip()

                # 품절여부
                soldout_img_list = name_div_ctx.find_all('img')
                for soldout_img in soldout_img_list:
                    if ('src' in soldout_img.attrs):
                        if (0 <= soldout_img.attrs['src'].find('soldout')):
                            product_data.crw_is_soldout = 1

            ####################################
            # 가격
            #
            # <p class="item-price">
            # <span class="list-price" id="ori_count1449" name="ori_count1449">74,000</span> <span class="now-price" id="promotion_ID1449" name="promotion_ID1449">40,900</span>
            # </p>
            ####################################

            div_list = product_ctx.find_all('p', class_='item-price')
            for div_ctx in div_list:
                span_list = div_ctx.find_all('span')
                for span_ctx in span_list:
                    if ('class' in span_ctx.attrs):
                        class_name_list = span_ctx.attrs['class']

                        if (class_name_list[0] == 'list-price'):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(
                                    span_ctx.get_text().strip()))
                        elif (class_name_list[0] == 'now-price'):
                            product_data.crw_price_sale = int(
                                __UTIL__.get_only_digit(
                                    span_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #10
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리 추출
            ####################################

            self.get_category_value(product_data, page_url, soup)

            ####################################
            # 상품 이미지 확인
            #
            # <img data-prodcode="s20200603cfcd165650db6" alt="" src="https://cdn.imweb.me/thumbnail/20200603/69b4e17496c01.jpg" class="_org_img org_img _lazy_img" data-original="https://cdn.imweb.me/thumbnail/20200603/69b4e17496c01.jpg" data-src="https://cdn.imweb.me/thumbnail/20200603/69b4e17496c01.jpg" style="display: inline;">
            #
            # class_='_org_img org_img _lazy_img'
            # class_='_org_img org_img owl-lazy'
            ####################################

            img_list = product_ctx.find_all('img')
            for img_ctx in img_list:
                if ('class' in img_ctx.attrs):
                    class_name_list = img_ctx.attrs['class']
                    if (2 < len(class_name_list)):
                        if (class_name_list[0]
                                == '_org_img') and (class_name_list[1]
                                                    == 'org_img'):
                            img_src = ''
                            if ('data-original' in img_ctx.attrs):
                                img_src = img_ctx.attrs['data-original'].strip(
                                )

                            if (img_src == ''):
                                if ('data-src' in img_ctx.attrs):
                                    img_src = img_ctx.attrs['data-src'].strip()

                            if (img_src == ''):
                                if ('src' in img_ctx.attrs):
                                    img_src = img_ctx.attrs['src'].strip()

                            if (img_src != ''):
                                img_link = self.set_img_url(
                                    self.BASIC_IMAGE_URL, img_src)
                                product_data.product_img = self.get_hangul_url_convert(
                                    img_link)

            ####################################
            # 품절여부 추출
            #
            # <div class="ns-icon clearfix">
            # <!--<span class="new bg-brand">NEW</span>-->
            # <div class="prod_icon sale">SALE</div><div class="prod_icon sold_out">SOLDOUT</div>											</div>
            #
            ####################################
            soldout_div_list = product_ctx.find_all(
                'div', class_='prod_icon sold_out')
            for soldout_div_ctx in soldout_div_list:
                product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <div class="item-detail" style="">
            # <div class="item-pay">
            # <h2 style="display: ">
            # <a class="_fade_link" href="/shop/?idx=1185">어반비스트 훈련용 코만도백</a>
            # </h2>
            # <div class="item-pay-detail">
            # <p class="sale_pay body_font_color_50" style="">78,000원</p>											<p class="pay" style=";">
            # 58,500원											</p>
            # </div>
            # <div class="ns-icon clearfix">
            # <!--<span class="new bg-brand">NEW</span>-->
            # <div class="prod_icon sale">SALE</div><div class="prod_icon sold_out">SOLDOUT</div>											</div>
            # </div>
            # <div class="item-summary holder">
            # <p>반려견의 산책과 훈련을 위한 코만도백</p>											<a class="item-summary-link _fade_link" href="/shop/?idx=1185"><span class="sr-only">상품 요약설명</span></a>
            # </div>
            # <div class="item-icon">
            # <span><i class="icon-bubble"></i> 0</span>
            # </div>
            # </div>
            #
            ####################################
            name_div_list = product_ctx.find_all('div', class_='item-detail')

            for name_div_ctx in name_div_list:
                h2_list = name_div_ctx.find_all('h2')
                for h2_ctx in h2_list:
                    product_link_ctx = name_div_ctx.find('a',
                                                         class_='_fade_link')
                    if (product_link_ctx != None):

                        if ('href' in product_link_ctx.attrs):
                            product_data.crw_name = h2_ctx.get_text().strip()

                            crw_post_url = self.get_crw_post_url(
                                product_link_ctx, 'href')
                            if (crw_post_url != ''):
                                split_list = crw_post_url.split('?idx=')
                                crw_goods_code_list = split_list[1].strip(
                                ).split('&')
                                product_data.crw_goods_code = crw_goods_code_list[
                                    0].strip()

            ####################################
            # 가격
            #
            # <div class="item-pay-detail">
            # <p class="sale_pay body_font_color_50" style="">78,000원</p>											<p class="pay" style=";">
            # 58,500원											</p>
            # </div>
            #
            ####################################
            price_div_list = product_ctx.find_all('div',
                                                  class_='item-pay-detail')

            for price_ctx in price_div_list:
                p_list = name_div_ctx.find_all('p')
                for p_ctx in p_list:
                    if ('class' in p_ctx.attrs):
                        class_name_list = p_ctx.attrs['class']
                        if (class_name_list[0] == 'sale_pay'):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(
                                    p_ctx.get_text().strip()))
                        elif (class_name_list[0] == 'pay'):
                            product_data.crw_price_sale = int(
                                __UTIL__.get_only_digit(
                                    p_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #11
0
    def set_product_data_second(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리 추출
            ####################################
            self.set_godo_category_data_second(soup, product_data)
            '''
			div_list = soup.find_all("div" , class_='cg-main')
			for div_ctx in div_list :
				category_list = div_ctx.find_all('h2')
				for category_ctx in category_list :
					product_data.crw_category1 = category_ctx.get_text().strip()
			'''
            '''
			# 브랜드 확인		
			brand_div_list = product_ctx.find_all('span', class_='item_brand')
			for brand_ctx in brand_div_list :
				brand_name = brand_ctx.get_text().strip()
				if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip()
			'''

            ####################################
            # 상품 이미지 확인
            #
            # <div class="thumbnail">
            # <a href="../goods/goods_view.php?goodsNo=1000000030"><img src="/data/goods/16/10/43/1000000030/1000000030_main_072.jpg" width="184" alt="Pet Esthé Spa Mud Conditioner (스파 머드 컨디셔너) (3L)" title="Pet Esthé Spa Mud Conditioner (스파 머드 컨디셔너) (3L)" class="middle">
            # </a>
            # </div>
            ####################################
            img_div_list = product_ctx.find_all('div', class_='thumbnail')
            for img_div_ctx in img_div_list:
                img_ctx = img_div_ctx.find('img')
                #for img_ctx in img_list :

                if (img_ctx != None):
                    img_src = ''
                    if ('data-original' in img_ctx.attrs):
                        img_src = img_ctx.attrs['data-original'].strip()
                    elif ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        if (product_data.product_img == ''):
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <div class="txt">
            # <a href="../goods/goods_view.php?goodsNo=1000000030">
            # <strong>Pet Esthé Spa Mud Conditioner (스파 머드 컨디셔너) (3L)</strong>                    </a>
            # </div>
            #
            ####################################
            name_div_list = product_ctx.find_all('div', class_='txt')

            for name_div_ctx in name_div_list:
                product_link_list = name_div_ctx.find_all('a')
                for product_link_ctx in product_link_list:

                    if ('href' in product_link_ctx.attrs):
                        span_list = product_link_ctx.find_all('strong')
                        for span_ctx in span_list:
                            product_data.crw_name = span_ctx.get_text().strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?goodsNo=')
                        sub_split_list = split_list[1].split('&')
                        product_data.crw_goods_code = sub_split_list[0].strip()

            ####################################
            # 가격 / 품절여부
            #
            # <div class="price gd-default">
            # <span class="cost">
            # <strong>180,000</strong>원
            # </span>
            # <br>
            # </div>
            #
            # -------------품절시 -----------------
            # <div class="price gd-default">
            # <span class="cost">
            # <strong>일시품절</strong>
            # </span>
            # <br>
            # </div>
            #
            ####################################
            div_list = product_ctx.find_all('div')
            for div_ctx in div_list:
                if ('class' in div_ctx.attrs):
                    class_name_list = div_ctx.attrs['class']
                    if (class_name_list[0] == 'price'):
                        cost_ctx = div_ctx.find('span', class_='cost')
                        if (cost_ctx != None):
                            cost_value = cost_ctx.get_text().strip()
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(cost_value))

                            if (0 < cost_value.find('품절')):
                                product_data.crw_is_soldout = 1

            if (crw_post_url != ''):
                if (self.PRODUCT_URL_HASH.get(crw_post_url, -1) == -1):

                    self.set_product_data_sub(product_data, crw_post_url)

                    #self.print_product_page_info( product_data )
                    self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #12
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리
            ####################################
            div_list = soup.find_all('div', class_='category_depth clearbox')
            for div_ctx in div_list:
                li_list = div_ctx.find_all('li', class_='item')
                idx = 0
                for li_ctx in li_list:
                    category_link = li_ctx.find('a')
                    if (category_link != None):
                        idx += 1
                        category_name = category_link.get_text().strip()
                        if (idx == 2):
                            product_data.crw_category1 = category_link.get_text(
                            ).strip()
                        elif (idx == 3):
                            product_data.crw_category2 = category_link.get_text(
                            ).strip()
                        elif (idx == 4):
                            product_data.crw_category3 = category_link.get_text(
                            ).strip()
            '''	
			####################################
			# 브랜드 추출	
			####################################
			'''

            ####################################
            # 상품 이미지 확인 / 상품 링크 정보 / 상품번호
            #
            # <a href="/goods/view?no=792" target="">
            # <img src="/data/goods/201606/792_18171332list2.jpg" width="130" onerror="this.src='/data/skin/0545blueface/images/common/noimage.gif';this.style.height='130px';">
            # </a>
            ####################################

            span_list = product_ctx.find_all('span',
                                             class_='goodsDisplayImageWrap')
            for span_ctx in span_list:
                product_link_ctx = span_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?no=')
                        product_data.crw_goods_code = split_list[1].strip()

                    img_list = product_link_ctx.find_all('img')
                    for img_ctx in img_list:
                        if ('onerror' in img_ctx.attrs):
                            img_src = ''
                            if ('data-original' in img_ctx.attrs):
                                img_src = img_ctx.attrs['data-original'].strip(
                                )
                            elif ('src' in img_ctx.attrs):
                                img_src = img_ctx.attrs['src'].strip()

                            if (img_src != ''):
                                img_link = self.set_img_url(
                                    self.BASIC_IMAGE_URL, img_src)
                                product_data.product_img = self.get_hangul_url_convert(
                                    img_link)

            ####################################
            # 상품명
            #
            # <a href="/goods/view?no=792" target=""><span style="color:#4C4C4C;font-family:dotum;font-size:10pt;font-weight:normal;text-decoration:none;">버박 칼시데리스 칼슘영양제 (30정)</span></a>
            ####################################
            name_div_list = product_ctx.find_all('a')
            for name_div_ctx in name_div_list:
                span_ctx = name_div_ctx.find('span')
                if (span_ctx != None):
                    product_data.crw_name = span_ctx.get_text().strip()

            ####################################
            # 가격 / 품절여부
            #
            # <span style="color:#4C4C4C;font-family:dotum;font-size:10pt;font-weight:normal;text-decoration:line-through;">
            # 21,000
            # 원								</span>
            #
            #
            # <span style="color:#4C4C4C;font-family:dotum;font-size:10pt;font-weight:bold;text-decoration:none;">
            # 15,000
            # 원								</span>
            #
            # -------- 품절시 --------------
            # <td align="center">
            # <img src="/data/icon/goods_status/icon_list_soldout.gif">
            # </td>
            ####################################

            div_list = product_ctx.find_all('td')
            for div_ctx in div_list:
                soldout_img_list = div_ctx.find_all('img')
                for soldout_ctx in soldout_img_list:
                    if ('src' in soldout_ctx.attrs):
                        if (0 <= soldout_ctx.attrs['src'].find('soldout')):
                            product_data.crw_is_soldout = 1

                span_ctx = div_ctx.find('span')
                if (span_ctx != None):
                    span_str = span_ctx.get_text().strip()
                    if ('style' in span_ctx.attrs):
                        if (span_str != ''):
                            if (span_str[0].isdigit()) and (
                                    0 < span_str.find('원')):
                                if (0 < span_ctx.attrs['style'].find(
                                        'text-decoration:line-through')):
                                    product_data.crw_price = int(
                                        __UTIL__.get_only_digit(span_str))
                                if (0 < span_ctx.attrs['style'].find(
                                        'text-decoration:none')):
                                    product_data.crw_price_sale = int(
                                        __UTIL__.get_only_digit(span_str))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #13
0
    def set_product_data(self, page_url, soup, category_path_str,
                         sub_category_str, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            idx = 0
            for category_name in category_path_str:
                idx += 1
                if (idx == 2): product_data.crw_category1 = category_name
                elif (idx == 3): product_data.crw_category2 = category_name
                elif (idx == 4): product_data.crw_category3 = category_name

            if (sub_category_str != ''):
                if (idx == 2): product_data.crw_category2 = sub_category_str
                elif (idx == 3): product_data.crw_category3 = sub_category_str

            #product_data.crw_category1 = self.PAGE_URL_HASH[ page_url ]
            '''	
			####################################
			# 브랜드 추출	
			####################################
			brand_div_list = product_ctx.find_all('div', class_='brand')
			for brand_ctx in brand_div_list :
				product_data.crw_brand1 = brand_ctx.get_text().strip()
			'''

            ####################################
            # 상품 이미지 확인 / 품절여부 추출
            #
            # <a href="/view/product/G0OS8PKL0KAHNCUJ/YSJRQFSCH" class="itemImg" title="[맛보기초특가!] 체험팩 8종 8팩">
            # <img src="http://www.dhuman.co.kr/static-root/prdct/2020/05/13/bd1f6f76032f48329e927e9f7e727fe7.jpg" alt="[맛보기초특가!] 체험팩 8종 8팩" class="" loading="lazy">
            # <span class="discountThumb">
            # <span class="discount"><strong>50</strong>%</span>
            # </span>
            # <span class="packplayWrap">
            # <span class="pack_bg02"><span><strong class="pack_font">8</strong>팩</span></span>
            # </span>
            # </a>
            #
            ####################################
            img_div_list = product_ctx.find_all('a', class_='itemImg')
            for img_div_ctx in img_div_list:
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('data-original' in img_ctx.attrs):
                        img_src = img_ctx.attrs['data-original'].strip()
                    elif ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

                span_list = img_div_ctx.find_all('span')
                for span_ctx in span_list:
                    soldout_str = span_ctx.get_text().strip()
                    if (0 <= soldout_str.find('품절')):
                        product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <div class="itemTit">
            # <p class="name">
            # <a href="/view/product/G0OS8PKL0KAHNCUJ/YSJRQFSCH" title="[맛보기초특가!] 체험팩 8종 8팩">
            # [맛보기초특가!] 체험팩 8종 8팩
            # </a>
            # </p>
            # <p class="cmnt">#휴먼그레이드 #신상체험팩</p>
            # </div>
            #
            ####################################

            name_div_list = product_ctx.find_all('div', class_='itemTit')
            for name_div_ctx in name_div_list:
                name_ctx = name_div_ctx.find('p', class_='name')
                if (name_ctx != None):
                    name_link_ctx = name_ctx.find('a')
                    if (name_link_ctx != None):
                        if ('href' in name_link_ctx.attrs):
                            product_data.crw_name = name_link_ctx.get_text(
                            ).strip()
                            split_list = name_link_ctx.attrs['href'].strip(
                            ).split(';')
                            tmp_product_link = split_list[0].strip()
                            if (0 != tmp_product_link.find('http')):
                                tmp_product_link = '%s%s' % (
                                    self.BASIC_PRODUCT_URL,
                                    split_list[0].strip())
                            crw_post_url = tmp_product_link

                            if (self.C_PRODUCT_STRIP_STR != ''):
                                crw_post_url = tmp_product_link.replace(
                                    self.C_PRODUCT_STRIP_STR, '')

                            split_list = crw_post_url.split('/')
                            product_data.crw_goods_code = split_list[6].strip()

            ####################################
            # 가격
            #
            # <div class="priceWrap">
            # <span class="primecost"><strong>19,900</strong></span>
            # <span class="price"><strong>9,900</strong>원</span>
            # </div>
            ####################################

            div_list = product_ctx.find_all('div', class_='priceWrap')
            for div_ctx in div_list:
                span_list = div_ctx.find_all('span')
                for span_ctx in span_list:
                    if ('class' in span_ctx.attrs):
                        class_name_list = span_ctx.attrs['class']
                        if (class_name_list[0] == 'primecost'):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(
                                    span_ctx.get_text().strip()))
                        elif (class_name_list[0] == 'price'):
                            product_data.crw_price_sale = int(
                                __UTIL__.get_only_digit(
                                    span_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #14
0
    def set_product_data(self, category_key, soup, product_ctx):
        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리 추출
            ####################################

            self.get_category_value(product_data, category_key, soup)

            ####################################
            # 상품 이미지 확인
            #
            # <div class="thumb img" imgsrc="/uploadedFiles/46606/product/image_1573609552547.jpeg" style="width:100%;background-image:url(https://contents.sixshop.com/thumbnails/uploadedFiles/46606/product/image_1573609552547_1000.jpeg)"></div>
            #
            ####################################
            img_div_list = product_ctx.find_all('div', class_='thumb img')
            for img_div_ctx in img_div_list:
                if ('style' in img_div_ctx.attrs):
                    tmp_img_src = img_div_ctx.attrs['style'].strip()
                    split_list = tmp_img_src.split(':url(')
                    img_src = split_list[1].replace(')', '')
                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

            ####################################
            # 품절여부 추출
            #
            # <div class="soldOutBadge badge"><span>Sold Out</span></div>
            #
            ####################################
            soldout_div_list = product_ctx.find_all(
                'div', class_='soldOutBadge badge')
            for soldout_div_ctx in soldout_div_list:
                product_data.crw_is_soldout = 1

            # 가격 부분에 sold out 문구가 있는 경우
            price_div_list = product_ctx.find_all('div',
                                                  class_='shopProduct price')
            for price_ctx in price_div_list:
                soldout_str = price_ctx.get_text().strip()
                if (0 <= soldout_str.lower().find('sold')) and (
                        0 < soldout_str.lower().find('out')):
                    product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <div class="shopProductWrapper badgeUse" data-productno="1008345"><a href="/product/Chu"><div class="thumbDiv"><div class="thumb img" imgsrc="/uploadedFiles/46606/product/image_1573609552547.jpeg" style="width:100%;background-image:url(https://contents.sixshop.com/thumbnails/uploadedFiles/46606/product/image_1573609552547_1000.jpeg)"></div><div class="shopProductBackground"></div><div class="badgeWrapper"><div class="soldOutBadge badge"><span>Sold Out</span></div></div></div><div class="shopProductNameAndPriceDiv"><div class="shopProductNameAndPriceContent"><div class="shopProductNameAndPrice"><div class="shopProduct productName">멜로니코코 풉백</div><div class="shopProduct price"><span class="productPriceSpan">20,000원</span></div></div></div></div></a></div>
            #
            ####################################

            if ('data-productno' in product_ctx.attrs):
                product_data.crw_goods_code = product_ctx.attrs[
                    'data-productno']

            product_link_ctx = product_ctx.find('a')
            if (product_link_ctx != None):
                if ('href' in product_link_ctx.attrs):
                    tmp_product_link = product_link_ctx.attrs['href'].strip()
                    if (0 != tmp_product_link.find('http')):
                        tmp_product_link = '%s%s' % (
                            self.BASIC_PRODUCT_URL,
                            product_link_ctx.attrs['href'].strip())
                    crw_post_url = tmp_product_link

                    if (self.C_PRODUCT_STRIP_STR != ''):
                        crw_post_url = tmp_product_link.replace(
                            self.C_PRODUCT_STRIP_STR, '')

            name_div_list = product_ctx.find_all(
                'div', class_='shopProduct productName')

            for name_div_ctx in name_div_list:
                product_data.crw_name = name_div_ctx.get_text().strip()

            ####################################
            # 가격
            #
            # <span class="productPriceSpan">20,000원</span>
            #
            # <div class="shopProduct price"><span class="productDiscountPriceSpan">16,200원 </span><span class="productPriceWithDiscountSpan">18,000원</span></div>
            ####################################
            price_div_list = product_ctx.find_all('div',
                                                  class_='shopProduct price')

            for price_ctx in price_div_list:
                span_list = price_ctx.find_all('span')
                for span_ctx in span_list:
                    if ('class' in span_ctx.attrs):
                        class_name_list = span_ctx.attrs['class']
                        if (class_name_list[0] == 'productPriceSpan'):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(
                                    span_ctx.get_text().strip()))
                        elif (class_name_list[0] == 'productDiscountPriceSpan'
                              ):
                            product_data.crw_price_sale = int(
                                __UTIL__.get_only_digit(
                                    span_ctx.get_text().strip()))
                        elif (class_name_list[0] ==
                              'productPriceWithDiscountSpan'):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(
                                    span_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #15
0
    def set_product_data_second(self, page_url, product_json):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            #self.set_product_category_second(page_url, product_data, soup)
            #product_data.crw_category1 = self.PAGE_URL_HASH[ page_url ]
            product_data.crw_category1 = self.CRW_CATEGORY_1
            product_data.crw_category2 = self.CRW_CATEGORY_2
            product_data.crw_category3 = self.CRW_CATEGORY_3

            for key in product_json:
                #__LOG__.Trace('%s : %s' % (key, product_json[key] ))
                # 이미지
                if (key == 'image_medium'):
                    img_src = product_json[key]
                    img_link = self.set_img_url(self.BASIC_IMAGE_URL, img_src)
                    product_data.product_img = self.get_hangul_url_convert(
                        img_link)

                if (key == 'image_big'):
                    if (product_data.product_img != ''):
                        img_src = product_json[key]
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

                if (key == 'image_small'):
                    if (product_data.product_img != ''):
                        img_src = product_json[key]
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

                # 상품명
                if (key == 'product_name_striptag'):
                    product_data.crw_name = product_json[key]
                if (key == 'product_name_tag'):
                    if (product_data.crw_name != ''):
                        product_data.crw_name = product_json[key]
                if (key == 'product_name'):
                    if (product_data.crw_name != ''):
                        product_data.crw_name = product_json[key]

                # 상품명번호
                if (key == 'product_no'):
                    product_data.crw_goods_code = str(product_json[key])

                # 상품 URL
                if (key == 'link_product_detail'):
                    tmp_product_link = product_json[key]
                    if (0 != tmp_product_link.find('http')):
                        tmp_product_link = '%s%s' % (self.BASIC_PRODUCT_URL,
                                                     product_json[key])
                    crw_post_url = tmp_product_link

                    if (self.C_PRODUCT_STRIP_STR != ''):
                        crw_post_url = tmp_product_link.replace(
                            self.C_PRODUCT_STRIP_STR, '')

                # 상품 가격
                if (key == 'product_custom'):
                    product_data.crw_price = int(product_json[key])
                if (key == 'product_price'):
                    product_data.crw_price_sale = int(product_json[key])

                # soldout
                if (key == 'soldout_icon'):
                    if (product_json[key].strip() != ''):
                        product_data.crw_is_soldout = 1

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #16
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <div class="thumb">
            # <div class="over_view  -mos01">
            # <ul>
            # <li><a href="javascript:viewdetail('011000000054', '1', '');" aria-label="확대보기" class="hint--top"><i class="xi-search" alt="확대보기"></i></a></li>
            # <li><a href="javascript:viewdetail('011000000054', '1', '');" aria-label="관심상품" class="hint--top"><i class="xi-heart-o" alt="관심상품"></i></a></li>
            # <li><a href="/shop/shopdetail.html?branduid=2243605&amp;xcode=009&amp;mcode=000&amp;scode=&amp;type=P&amp;sort=regdate&amp;cur_code=009&amp;GfDT=bm1%2BW1w%3D" aria-label="상세보기" class="hint--top"><i class="xi-bars" alt="상세보기"></i></a></li>
            # </ul>
            # </div>
            # <a href="/shop/shopdetail.html?branduid=2243605&amp;xcode=009&amp;mcode=000&amp;scode=&amp;type=P&amp;sort=regdate&amp;cur_code=009&amp;GfDT=bm1%2BW1w%3D"><img class="MS_prod_img_s" src="/shopimages/coates1024/0110000000543.gif?1581155993"></a>
            # </div>
            ####################################

            img_div_list = product_ctx.find_all('div', class_='thumb')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            # <li class="name">스카이 브라운 클래식 카시트</li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='name')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            #
            # <li>
            # <div class="over_sale -mos">30%</div>
            # <span class="price01">39,500원</span>
            # <span class="price02">27,600원</span>
            # </li>
            #
            #
            ####################################

            div_list = product_ctx.find_all('ul')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('span', class_='price02')
                consumer_ctx = div_ctx.find('span', class_='price01')
                soldout_ctx = div_ctx.find('li', class_='soldout')
                if (soldout_ctx != None): product_data.crw_is_soldout = 1

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #17
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <div class="thumb salebox">
            # <a href="/shop/shopdetail.html?branduid=3534594&amp;xcode=003&amp;mcode=001&amp;scode=&amp;type=X&amp;sort=manual&amp;cur_code=003&amp;GfDT=aWt3UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/cocochien/0030010000152.jpg?1581790516" alt="상품 섬네일"></a>
            # <input type="hidden" name="custom_price" value="0">
            # <input type="hidden" name="product_price" value="34500">
            # <div id="sale_bg" style="display: none;"><span class="sale_text"></span></div>
            # <div class="info_icon">
            # <span class="m_quickview"><a class="btn-overlay-show" href="javascript:viewdetail('003001000015', '1', '');"><img src="/design/cocochien/0746amelie/info_icon02.gif"></a></span>										<span class="m_option"><img src="/shopimages/cocochien/bt_opt_preview.gif" onclick="javascript:mk_prd_option_preview('3534594',event);"></span>									</div><!-- //info_icon -->
            # </div>
            ####################################

            img_div_list = product_ctx.find_all('div', class_='thumb salebox')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            # <li class="dsc">앨리스튜튜</li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='dsc')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            #<ul class="info">
            # <li class="dsc">네이비도트원피스(50%SALE)SM,XL주문가능</li>
            # <li class="subname"></li>
            # <li class="consumer">26,000원</li>										<li class="price">13,000원</li>
            # <li class="icon"><span class="MK-product-icons"></span></li>
            # </ul>
            #
            #---------- 품절시 --------------------
            # <ul class="info">
            #	<li class="dsc">마카롱나시원피스(50%SALE)</li>
            #	<li class="subname"></li>
            #	<li class="soldout">SOLD OUT</li>
            #	<li class="icon"><span class="MK-product-icons"></span></li>
            #	</ul>
            ####################################

            div_list = product_ctx.find_all('ul')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('li', class_='price')
                consumer_ctx = div_ctx.find('li', class_='consumer')
                soldout_ctx = div_ctx.find('li', class_='soldout')
                if (soldout_ctx != None): product_data.crw_is_soldout = 1

                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #18
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 이미지 확인
            # 상품 링크 정보 및 상품코드
            # 카테고리
            #
            # <div class="thumb">
            # <a href="/shop/shopdetail.html?branduid=1000006164&amp;xcode=007&amp;mcode=006&amp;scode=001&amp;type=X&amp;sort=order&amp;cur_code=007&amp;GfDT=aWx3UQ%3D%3D"><img class="MS_prod_img_m" src="/shopimages/sizeoo/0070060000702.jpg?1589180862" onmouseover="this.src='/shopimages/sizeoo/007006000070.jpg?1589180862'" onmouseout="this.src='/shopimages/sizeoo/0070060000702.jpg?1589180862'" alt="" title=""></a>
            # </div>
            ####################################

            img_div_list = product_ctx.find_all('div', class_='thumb')
            for img_div_ctx in img_div_list:
                product_link_list = img_div_ctx.find_all('a')
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        split_list = img_ctx.attrs['src'].strip().split('?')
                        img_src = split_list[0].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
                        break

                for product_link_ctx in product_link_list:
                    if ('href' in product_link_ctx.attrs):
                        crw_post_url = self.get_crw_post_url(
                            product_link_ctx, 'href')
                        if (crw_post_url != ''):
                            self.get_crw_goods_code(product_data, crw_post_url)
                            self.get_category_value(product_data, crw_post_url)
                            break

            ####################################
            # 상품명 및 브랜드
            # <li class="name"><span class="MK-product-icons"></span> 데이지 비치 원피스 (옐로우)</li>
            ####################################
            name_strong_list = product_ctx.find_all('li', class_='name')
            for name_strong_ctx in name_strong_list:
                product_data.crw_name = name_strong_ctx.get_text().strip()
                #
                # 이름 앞에 브랜드명이 있음.
                # [스텔라&츄이] 츄이스 치킨 디너패티
                if (0 == product_data.crw_name.find('[')):
                    brand_list = product_data.crw_name.split(']')
                    product_data.crw_brand1 = brand_list[0][1:].strip()

            ####################################
            # 가격 / 품절 여부 확인
            #
            #
            # <li class="price">
            # <span><s>32,000</s>원</span>
            # 32,000원
            # </li>
            #
            #------------품절시 ----------------
            # <li class="price">
            # Sold Out
            # </li>
            ####################################

            div_list = product_ctx.find_all('li', class_='price')
            for div_ctx in div_list:
                sell_price = div_ctx.get_text().strip()
                consumer_ctx = div_ctx.find('span')

                consumer_price = ''
                if (consumer_ctx != None):
                    consumer_price = consumer_ctx.get_text().strip()
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(consumer_price))

                crw_price_sale = sell_price.replace(consumer_price, '').strip()
                product_data.crw_price_sale = int(
                    __UTIL__.get_only_digit(crw_price_sale))
                # 품절시 가격없이 Sold Out 문구 나옴.
                if (0 < crw_price_sale.strip().find('Out')):
                    product_data.crw_is_soldout = 1

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)

                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #19
0
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			# 상품 카테고리
			#
			split_list = self.PAGE_URL_HASH[page_url].split('|')
			idx = 0
			for split_data in split_list :
				idx += 1
				if(idx == 1 ) : product_data.crw_category1 = split_data.strip()
				elif(idx == 2 ) : product_data.crw_category2 = split_data.strip()
				elif(idx == 3 ) : product_data.crw_category3 = split_data.strip()
				

			# 상품 이미지 확인
			div_list = product_ctx.find_all('div', class_='thumbnail')
			for div_ctx in div_list :
				a_link_list = product_ctx.find_all('a')
				for a_link_ctx in a_link_list :
					img_list = a_link_ctx.find_all('img')
					for img_ctx in img_list :
						if('src' in img_ctx.attrs ) :
							img_src = img_ctx.attrs['src'].strip()
							if( img_src != '' ) :
								img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src )
								if(product_data.product_img == '') : product_data.product_img = self.get_hangul_url_convert( img_link )

			# 품절여부 확인
			self.set_product_soldout_first(product_data, product_ctx ) 
			

			name_div_list = product_ctx.find_all('strong', class_='name')

			for name_div_ctx in name_div_list :
				#
				# 상품명 / 상품코드
				#
				product_link_list = name_div_ctx.find_all('a')
				for product_link_ctx in product_link_list :				
					if('href' in product_link_ctx.attrs ) : 
						product_data.crw_name = product_link_ctx.get_text().strip()
							
						tmp_product_link = product_link_ctx.attrs['href'].strip()
						if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() )
						crw_post_url = tmp_product_link

						if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'')
						
						split_list = crw_post_url.split('/')
						if( product_data.crw_name == '') : product_data.crw_name = split_list[4].strip()
						product_data.crw_goods_code = split_list[5].strip()

				
			#
			# 가격 / 브랜드
			#

			div_list = product_ctx.find_all('div', class_='description')
			for div_ctx in div_list :
				span_list = div_ctx.find_all('span')
				for span_ctx in span_list :
					if('class' in span_ctx.attrs ) :
						class_name_list = span_ctx.attrs['class']
						if(len(class_name_list) == 1) and ( class_name_list[0].strip() == 'summary') : product_data.crw_brand1 = span_ctx.get_text().strip()
						
				li_list = div_ctx.find_all('li')
				for li_ctx in li_list :
					if('class' in li_ctx.attrs ) :
						class_name_list = li_ctx.attrs['class']
						if(len(class_name_list) == 1) and ( class_name_list[0].strip() == 'price') : product_data.crw_price = int( __UTIL__.get_only_digit( li_ctx.get_text().strip() ) )
						
			
			if( crw_post_url != '' ) :
				self.set_product_url_hash( product_data, crw_post_url) 
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
Exemple #20
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)
            category_ctx_list = soup.select('#menu_inner')
            for category_ctx in category_ctx_list:
                split_list = category_ctx.get_text().strip().split('>')
                idx = 0
                for a_ctx in split_list:
                    idx += 1
                    category_name = a_ctx.strip()
                    if (idx == 2): product_data.crw_category1 = category_name
                    elif (idx == 3): product_data.crw_category2 = category_name
                    elif (idx == 4): product_data.crw_category3 = category_name

            #product_data.crw_category1 = self.PAGE_URL_HASH[page_url]

            ############################
            # 품절여부
            ############################
            soldout_ctx = product_ctx.find('span', class_='soldOut')
            if (soldout_ctx != None): product_data.crw_is_soldout = 1

            ####################################
            # 상품 이미지 확인
            #
            # <div class="thumbnail">
            # <div class="centered">
            # <a href="prd_detail.php?idx=171&amp;part_idx=90"><img src="/data/goodsImages/1529056838_IMAGES1.jpg" data-pin-nopin="true"></a>
            # </div>
            # </div>
            ####################################

            span_list = product_ctx.find_all('div', class_='thumbnail')
            for span_ctx in span_list:
                product_link_ctx = span_ctx.find('a')
                if (product_link_ctx != None):
                    img_list = product_link_ctx.find_all('img')
                    for img_ctx in img_list:
                        img_src = ''
                        if ('data-original' in img_ctx.attrs):
                            img_src = img_ctx.attrs['data-original'].strip()
                        elif ('src' in img_ctx.attrs):
                            img_src = img_ctx.attrs['src'].strip()

                        if (img_src != ''):
                            img_link = self.set_img_url(
                                self.BASIC_IMAGE_URL, img_src)
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            ####################################
            # 상품명 / 상품 링크 정보 / 상품번호
            #
            # <div class="title"><a href="prd_detail.php?idx=171&amp;part_idx=90"><!--[보듬]--> 보듬 10mm 폴딩 리드줄 (길이 조절 가능)</a></div>
            ####################################
            name_div_list = product_ctx.find_all('div', class_='title')
            for name_div_ctx in name_div_list:
                span_ctx = name_div_ctx.find('a')
                if (span_ctx != None):
                    if ('href' in span_ctx.attrs):
                        tmp_product_link = span_ctx.attrs['href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                span_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?idx=')
                        sub_split_list = split_list[1].strip().split('&')
                        product_data.crw_goods_code = sub_split_list[0]

                    split_list = span_ctx.get_text().strip().split(']')
                    crw_name = split_list[0].strip()
                    if (len(split_list) == 2):
                        product_data.crw_brand1 = split_list[0].replace(
                            '[', '').strip()
                        crw_name = split_list[1].strip()
                    product_data.crw_name = crw_name

            ####################################
            # 가격
            #
            # <div class="priceWrap">
            # <div class="saleprice"><span>119,000</span>원</div>
            # <div class="price"><strong>101,150</strong>원</div>
            # </div>
            ####################################

            div_list = product_ctx.find_all('div', class_='priceWrap')
            for div_ctx in div_list:
                saleprice_ctx = div_ctx.find('div', class_='saleprice')
                price_ctx = div_ctx.find('div', class_='price')
                if (saleprice_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            saleprice_ctx.get_text().strip()))
                if (price_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(price_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #21
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리 추출
            ####################################
            if (self.C_DETAIL_CATEGORY_VALUE.strip() != ''):
                split_list = self.PAGE_URL_HASH[page_url].split('|')
                idx = 0
                for split_data in split_list:
                    idx += 1
                    if (idx == 1): product_data.crw_category1 = split_data
                    elif (idx == 2): product_data.crw_category2 = split_data
                    elif (idx == 3): product_data.crw_category3 = split_data

            #div_list = soup.find_all( 'div' , class_='sub_title_txt' )
            #for div_ctx in div_list :
            #	category_list = div_ctx.find_all( 'h2' )
            #	for category_ctx in category_list :
            #		product_data.crw_category1 = category_ctx.get_text().strip()
            '''			
			####################################
			# 브랜드 추출	
			####################################
			brand_div_list = product_ctx.find_all('span', class_='item_brand')
			for brand_ctx in brand_div_list :
				brand_name = brand_ctx.get_text().strip()
				if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip()
			'''

            ####################################
            # 상품 이미지 확인
            #
            # <div class="img">
            # <img src="https://img.mywisa.com/freeimg/smallstuff/_data/product/201803/31/9ee1628095bceaf0f9bb5d8dae079791.jpg" width="260" height="260">
            # <!-- 상품품절 영역 -->
            # 생략
            # </div>
            ####################################

            img_div_list = product_ctx.find_all('div', class_='prdimg')
            for img_div_ctx in img_div_list:
                img_ctx = img_div_ctx.find('img')

                #for img_ctx in img_list :
                if (img_ctx != None):
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

            ####################################
            # 품절여부 추출
            # 품절시 <div class="info out"> 으로 표현됨
            #
            # <!-- 상품품절 영역 -->
            #      <div class="soldout" onclick="location.href='https://www.smallstuff.kr/shop/detail.php?pno=A02FFD91ECE5E7EFEB46DB8F10A74059&amp;rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&amp;ctype=1&amp;cno1=1001'">Sold out</div>
            #      <!-- //상품품절 영역 -->
            ####################################

            if ('class' in product_ctx.attrs):
                class_name_list = product_ctx.attrs['class']
                # if( len(class_name_list) == 2 ) :
                if (class_name_list[0] == 'soldout'):
                    product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <p class="name"><a href="https://www.smallstuff.kr/shop/detail.php?pno=BEED13602B9B0E6ECB5B568FF5058F07&amp;rURL=https%3A%2F%2Fwww.smallstuff.kr%2Fshop%2Fbig_section.php%3Fcno1%3D1001&amp;ctype=1&amp;cno1=1001">DINING SET 1P OLIVE</a></p>
            #
            ####################################
            name_strong_list = product_ctx.find_all('p', class_='name')
            for name_strong_ctx in name_strong_list:
                product_link_ctx = name_strong_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        product_data.crw_name = product_link_ctx.get_text(
                        ).strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?pno=')
                        second_split_list = split_list[1].split('&')
                        product_data.crw_goods_code = second_split_list[
                            0].strip()

            ####################################
            # 가격
            #
            # 	<div class="price">

            # 	<p class="consumer consumer">KRW 24,000</p>

            # 	<p class="sell sell"><strong>KRW 22,800 </strong></p>
            # </div>
            #
            ####################################

            div_list = product_ctx.find_all('div', class_='price')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('p', class_='sell')
                # print('sell_ctx',sell_ctx)
                if (sell_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(sell_ctx.get_text().strip()))
                crw_price = div_ctx.find('p', class_='consumer')
                if (crw_price != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(crw_price.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #22
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            category_ctx_list = soup.select('#sct_location')
            for category_ctx in category_ctx_list:
                a_ctx_list = category_ctx.find_all('a')
                idx = 0
                for a_ctx in a_ctx_list:
                    idx += 1
                    category_name = a_ctx.get_text().strip()
                    if (idx == 2): product_data.crw_category1 = category_name
                    elif (idx == 3): product_data.crw_category2 = category_name
                    elif (idx == 4): product_data.crw_category3 = category_name

            #product_data.crw_category1 = self.PAGE_URL_HASH[page_url]

            ####################################
            # 상품 이미지 확인
            #
            # <div class="sct_img">
            # <a href="http://shop.i-avec.com/shop/item.php?it_id=1585815848">
            # <img src="http://shop.i-avec.com/data/item/1585815848/thumb-7KCE7ZW07IiY6riw_front_310x310.png" width="310" height="310" alt="아베크 전해수기 뿌조" title="">
            # </a>
            # <div class="sct_sns"><a href="https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fshop.i-avec.com%2Fshop%2Fitem.php%3Fit_id%3D1585815848&amp;p=%EC%95%84%EB%B2%A0%ED%81%AC+%EC%A0%84%ED%95%B4%EC%88%98%EA%B8%B0+%EB%BF%8C%EC%A1%B0+%7C+%EC%95%84%EB%B2%A0%ED%81%AC+%ED%8E%AB%EB%93%9C%EB%9D%BC%EC%9D%B4%EB%A3%B8-%ED%8E%AB+%EC%82%B4%EA%B7%A0+%ED%86%A0%ED%83%88+%EC%BC%80%EC%96%B4%EB%A3%B8" class="share-facebook" target="_blank"><img src="http://shop.i-avec.com/theme/basic/skin/shop/basic/img/facebook.png" alt="페이스북에 공유"></a><a href="https://twitter.com/share?url=http%3A%2F%2Fshop.i-avec.com%2Fshop%2Fitem.php%3Fit_id%3D1585815848&amp;text=%EC%95%84%EB%B2%A0%ED%81%AC+%EC%A0%84%ED%95%B4%EC%88%98%EA%B8%B0+%EB%BF%8C%EC%A1%B0+%7C+%EC%95%84%EB%B2%A0%ED%81%AC+%ED%8E%AB%EB%93%9C%EB%9D%BC%EC%9D%B4%EB%A3%B8-%ED%8E%AB+%EC%82%B4%EA%B7%A0+%ED%86%A0%ED%83%88+%EC%BC%80%EC%96%B4%EB%A3%B8" class="share-twitter" target="_blank"><img src="http://shop.i-avec.com/theme/basic/skin/shop/basic/img/twitter.png" alt="트위터에 공유"></a><a href="https://plus.google.com/share?url=http%3A%2F%2Fshop.i-avec.com%2Fshop%2Fitem.php%3Fit_id%3D1585815848" class="share-googleplus" target="_blank"><img src="http://shop.i-avec.com/theme/basic/skin/shop/basic/img/gplus.png" alt="구글플러스에 공유"></a></div>
            # </div>
            #
            ####################################

            span_list = product_ctx.find_all('div', class_='sct_img')
            for span_ctx in span_list:
                product_link_ctx = span_ctx.find('a')
                if (product_link_ctx != None):
                    img_list = product_link_ctx.find_all('img')
                    for img_ctx in img_list:
                        img_src = ''
                        if ('data-original' in img_ctx.attrs):
                            img_src = img_ctx.attrs['data-original'].strip()
                        elif ('src' in img_ctx.attrs):
                            img_src = img_ctx.attrs['src'].strip()

                        if (img_src != ''):
                            img_link = self.set_img_url(
                                self.BASIC_IMAGE_URL, img_src)
                            product_data.product_img = self.get_hangul_url_convert(
                                img_link)

            ####################################
            # 품절여부
            #
            # <div class="sct_icon"><span class="sit_icon"><br><span class="shop_icon_soldout">Sold Out</span></span></div>
            ####################################
            name_div_list = product_ctx.find_all('div', class_='sct_icon')
            for name_div_ctx in name_div_list:
                soldout_ctx = name_div_ctx.find('span',
                                                class_='shop_icon_soldout')
                if (soldout_ctx != None): product_data.crw_is_soldout = 1

            ####################################
            # 상품명 / 상품 링크 정보 / 상품번호
            #
            # <div class="sct_txt"><a href="http://shop.i-avec.com/shop/item.php?it_id=1585815848">
            # 아베크 전해수기 뿌조
            # </a></div>
            ####################################
            name_div_ctx = product_ctx.find('div', class_='sct_txt')
            if (name_div_ctx != None):
                product_link_ctx = name_div_ctx.find('a')
                if (product_link_ctx != None):
                    if ('href' in product_link_ctx.attrs):
                        product_data.crw_name = product_link_ctx.get_text(
                        ).strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?it_id=')
                        sub_split_list = split_list[1].strip().split('&')
                        product_data.crw_goods_code = sub_split_list[0]

            ####################################
            # 가격
            #
            # <div class="sct_cost">
            # <span class="sct_discount">70,000원</span>
            # 62,900원
            #</div>
            ####################################

            price_ctx = product_ctx.find('div', class_='sct_cost')
            if (price_ctx != None):
                price_discount_ctx = product_ctx.find('span',
                                                      class_='sct_discount')
                if (price_discount_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            price_discount_ctx.get_text().strip()))
                    len_price_str = len(price_discount_ctx.get_text().strip())
                    price_str = price_ctx.get_text().strip()
                    crw_price_sale = price_str[len_price_str:].strip()
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(crw_price_sale))
                else:
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(price_ctx.get_text().strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #23
0
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			self.reset_product_category(product_data)
			
			self.get_category_value( product_data, page_url, soup )
	
					
			####################################
			# 브랜드 추출	
			#
			# <div class="brand">SALLYS LAW</div>
			####################################
			brand_div_list = product_ctx.find_all('div', class_='brand')
			for brand_ctx in brand_div_list :
				product_data.crw_brand1 = brand_ctx.get_text().strip()
				
			####################################				
			# 상품 이미지 확인
			#
			# <div class="img">
			# <img src="//image.wconcept.co.kr/productimg/image/img1/96/300972496.jpg?RS=300" alt="">
			# </div>
			####################################
			img_div_list = product_ctx.find_all('div', class_='img')
			for img_div_ctx in img_div_list :
				img_list = img_div_ctx.find_all('img')
				for img_ctx in img_list :
					img_src = ''
					if('data-original' in img_ctx.attrs ) : img_src = img_ctx.attrs['data-original'].strip()
					elif('src' in img_ctx.attrs ) : img_src = img_ctx.attrs['src'].strip()
					
					split_list = img_src.split('?')
					img_src = split_list[0].strip()
					if( img_src != '' ) :
						img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src )
						product_data.product_img = self.get_hangul_url_convert( img_link )

			'''				
			####################################
			# 품절여부 추출
			####################################
			soldout_div_list = product_ctx.find_all('div', class_='item_icon_box')
			for soldout_div_ctx in soldout_div_list :
				img_list = soldout_div_ctx.find_all('img')
				for img_ctx in img_list :
					if('src' in img_ctx.attrs ) :
						if(0 < img_ctx.attrs['src'].find('soldout') ) : product_data.crw_is_soldout = 1

			'''

			
			####################################
			# 상품 링크 정보 및 상품명 / 상품코드
			#
			# <a href="/Product/300972496">
			# 
			####################################
			

			product_link_ctx = product_ctx.find('a')
			if( product_link_ctx != None ) :

				if('href' in product_link_ctx.attrs ) : 
					tmp_product_link = product_link_ctx.attrs['href'].strip()
					if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, product_link_ctx.attrs['href'].strip() )
					crw_post_url = tmp_product_link

					if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'')
				
					split_list = crw_post_url.split('/')
					product_data.crw_goods_code = split_list[4].strip()
					
				
			name_strong_list = product_ctx.find_all('div', class_='product ellipsis multiline')
			for name_strong_ctx in name_strong_list :
				product_data.crw_name = name_strong_ctx.get_text().strip()

			
			
			####################################
			# 가격
			#
			# <div class="price">
			# <span class="discount_price">74,400</span>
			# <span class="base_price">93,000</span>
			# <span class="discount_rate">20%</span>
			# </div>
			#
			####################################
			
			div_list = product_ctx.find_all('div', class_='price')
			for div_ctx in div_list :
				span_list = div_ctx.find_all('span')
				for span_ctx in span_list :
					if('class' in span_ctx.attrs ) :
						class_name_list = span_ctx.attrs['class']
						if(class_name_list[0] == 'base_price' ) : product_data.crw_price = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() ) )
						elif(class_name_list[0] == 'discount_price' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( span_ctx.get_text().strip() ))
					
			
			if( crw_post_url != '' ) :
				#if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : 
				
				self.set_product_data_sub( product_data, crw_post_url )		
				self.process_product_api(product_data)
										
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
Exemple #24
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            ####################################
            # 상품 카테고리 추출
            ####################################
            product_data.crw_category1 = self.PAGE_URL_HASH[page_url]
            '''
			# 브랜드 확인		
			brand_div_list = product_ctx.find_all('span', class_='item_brand')
			for brand_ctx in brand_div_list :
				brand_name = brand_ctx.get_text().strip()
				if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip()
			'''

            ####################################
            # 상품 이미지 확인
            ####################################

            img_div_list = product_ctx.find_all('div', class_='goodsimg')
            for img_div_ctx in img_div_list:
                img_list = img_div_ctx.find_all('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('data-original' in img_ctx.attrs):
                        img_src = img_ctx.attrs['data-original'].strip()
                    elif ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()
                    if (img_src.startswith('..')):
                        tmp_img_src = '/shop%s' % img_src[2:]
                        img_src = tmp_img_src

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)
            '''
			# 품절여부 확인
			soldout_div_list = product_ctx.find_all('div', class_='item_icon_box')
			for soldout_div_ctx in soldout_div_list :
				if(config.__DEBUG__) : __LOG__.Trace('품절여부 확인')
				img_list = soldout_div_ctx.find_all('img')
				for img_ctx in img_list :
					if('src' in img_ctx.attrs ) :
						if(0 < img_ctx.attrs['src'].find('soldout') ) :product_data.crw_is_soldout = 1
					

			
			# 품절여부 확인
			soldout_div_list = product_ctx.find_all('div', class_='item_photo_box')
			for soldout_div_ctx in soldout_div_list :
				if(config.__DEBUG__) : __LOG__.Trace('품절여부 확인')
				img_list = soldout_div_ctx.find_all('strong', class_='item_soldout_bg')
				for img_ctx in img_list :
					product_data.crw_is_soldout = 1
			'''

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            ####################################
            name_div_list = product_ctx.find_all('div', class_='goods_m_name')

            for name_div_ctx in name_div_list:

                product_link_list = name_div_ctx.find_all('a')
                for product_link_ctx in product_link_list:

                    if ('href' in product_link_ctx.attrs):
                        product_data.crw_name = product_link_ctx.get_text(
                        ).strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?goodsno=')
                        sub_split_list = split_list[1].strip().split('&')
                        product_data.crw_goods_code = sub_split_list[0].strip()

            ####################################
            # 가격
            ####################################
            div_list = product_ctx.find_all('div')
            for div_ctx in div_list:
                cost_ctx = div_ctx.find('b')
                strike_ctx = div_ctx.find('strike')
                if (cost_ctx != None):
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(cost_ctx.get_text().strip()))
                if (strike_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(strike_ctx.get_text().strip()))

            if (crw_post_url != ''):
                if (self.PRODUCT_URL_HASH.get(crw_post_url, -1) == -1):

                    self.set_product_data_sub(product_data, crw_post_url)

                    #self.print_product_page_info( product_data )
                    self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #25
0
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			####################################				
			# 상품 이미지 확인
			# 상품 링크 정보 및 상품코드
			# 카테고리
			#
			# <dt class="thumb"><a href="/shop/shopdetail.html?branduid=10163894&amp;xcode=001&amp;mcode=005&amp;scode=003&amp;type=X&amp;sort=manual&amp;cur_code=001&amp;GfDT=bml9W1w%3D"><img class="MS_prod_img_m" src="/shopimages/dermadog/0010050000192.jpg?1591754112" alt="상품 섬네일" title="상품 섬네일"></a></dt>
			####################################

			img_div_list = product_ctx.find_all('dt', class_='thumb')
			for img_div_ctx in img_div_list :
				img_list = img_div_ctx.find_all('img')
				for img_ctx in img_list :
					img_src = ''
					if('src' in img_ctx.attrs ) : 
						split_list = img_ctx.attrs['src'].strip().split('?')
						img_src = split_list[0].strip()
						
					if( img_src != '' ) :
						img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src )
						product_data.product_img = self.get_hangul_url_convert( img_link )
						break

				product_link_ctx = img_div_ctx.find('a')
				if( product_link_ctx != None) :
					if('href' in product_link_ctx.attrs ) : 
						crw_post_url = self.get_crw_post_url( product_link_ctx, 'href')
						if(crw_post_url != '') :
							self.get_crw_goods_code( product_data, crw_post_url )
							self.get_category_value( product_data, crw_post_url )
						

			
			####################################
			# 상품명 및 브랜드
			#
			# <dd class="prd-info">
			# <ul>  
			# <li class="prd-brand"><span class="MK-product-icons"><img src="/shopimages/dermadog/prod_icons/4154?1591753540" class="MK-product-icon-2"></span></li>
			# <li class="prd-name"><a href="/shop/shopdetail.html?branduid=10163894&amp;xcode=001&amp;mcode=005&amp;scode=003&amp;type=X&amp;sort=manual&amp;cur_code=001&amp;GfDT=bml9W1w%3D">연어/스킨 헬스츄 15g</a></li>
			# </ul>
			# </dd>
			#
			####################################
			name_dd_list = product_ctx.find_all('dd', class_='prd-info')
			for name_dd_ctx in name_dd_list :
				name_ctx = name_dd_ctx.find('li', class_='prd-name')
				if( name_ctx != None) : product_data.crw_name = name_ctx.get_text().strip()
				
				brand_ctx = name_dd_ctx.find('li', class_='prd-brand')
				if( brand_ctx != None) : product_data.crw_brand1 = brand_ctx.get_text().strip()


			####################################
			# 가격 / 품절 여부 확인
			#
			#
			# <p class="price-info">
			# <strike>10,000</strike><br>
			# <span class="won">₩</span><span class="price">9,000</span>
			# </p>
			#
			#---- 품절시  -------
			#
			# <p class="price-info">
			# Sold Out
			# </p>
			#
			####################################
			
			div_list = product_ctx.find_all('p', class_='price-info')
			for div_ctx in div_list :
				price_str = div_ctx.get_text().strip()
				if(0 <= price_str.find('Out')) : product_data.crw_is_soldout = 1
				
				sell_ctx = div_ctx.find('span', class_='price')
				consumer_ctx = div_ctx.find('strike')
					
				if( consumer_ctx != None ) : product_data.crw_price = int( __UTIL__.get_only_digit( consumer_ctx.get_text().strip() ))

				if( sell_ctx != None ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( sell_ctx.get_text().strip() ))
			

			if( crw_post_url != '' ) :
				#if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : 
				
				self.set_product_data_sub( product_data, crw_post_url )			
				self.process_product_api(product_data)
										
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
Exemple #26
0
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			self.reset_product_category(product_data)
			
			category_ctx_list = soup.select('#layout_config_full > div.category_depth.clearbox > ul')			
			for category_ctx in category_ctx_list :
				a_ctx_list = category_ctx.get_text().strip().split('>')
				idx = 0
				for a_ctx in a_ctx_list :
					idx += 1
					category_name = a_ctx.strip()
					if(idx == 2 ) : product_data.crw_category1 = category_name
					elif(idx == 3 ) : product_data.crw_category2 = category_name
					elif(idx == 4 ) : product_data.crw_category3 = category_name
					
			#product_data.crw_category1 = self.PAGE_URL_HASH[page_url]
			
			
			####################################				
			# 상품 이미지 확인 / 상품 링크 정보 / 상품번호
			#
			# <a href="javascript:void(0)" onclick="display_goods_view('196','',this,'goods_view')"><span style="color:#000000;font-weight:normal;text-decoration:none;" class="goods_name">강아지 목줄/3M리드줄세트 <br>콤비네이션_베이지브라운</span></a>
			####################################

			span_list = product_ctx.find_all('div', class_='goodsDisplayImageWrap')
			for span_ctx in span_list :
				product_link_ctx = span_ctx.find('a')
				if( product_link_ctx != None ) :
					if('onclick' in product_link_ctx.attrs ) : 
						split_list = product_link_ctx.attrs['onclick'].split('display_goods_view(')
						sub_split_list = split_list[1].split(',')
						product_data.crw_goods_code = sub_split_list[0].replace("'","").strip()
						
						tmp_product_link = self.SITE_HOME + '/goods/view?no=' + product_data.crw_goods_code
						if(0 != tmp_product_link.find('http')) : tmp_product_link = '%s%s' % ( self.BASIC_PRODUCT_URL, span_ctx.attrs['href'].strip() )
						crw_post_url = tmp_product_link
						if(self.C_PRODUCT_STRIP_STR != '') : crw_post_url = tmp_product_link.replace( self.C_PRODUCT_STRIP_STR,'')

						
					img_list = product_link_ctx.find_all('img')
					for img_ctx in img_list :
						img_src = ''
						if('data-original' in img_ctx.attrs ) : img_src = img_ctx.attrs['data-original'].strip()
						elif('src' in img_ctx.attrs ) : img_src = img_ctx.attrs['src'].strip()
							
						if( img_src != '' ) :
							img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src )
							product_data.product_img = self.get_hangul_url_convert( img_link )

				
			####################################
			# 상품명
			#
			# <span style="color:#000000;font-weight:normal;text-decoration:none;" class="goods_name">강아지 목줄 <br>콤비네이션_옐로우레드</span>
			####################################
			name_div_ctx = product_ctx.find('span', class_='goods_name')
			if( name_div_ctx != None) :
				product_data.crw_name = name_div_ctx.get_text().replace('\n',' ').strip()
	
	

			####################################
			# 가격
			#
			# <li>
			# <span class="price_txt">판매가</span>
			# <span style="color:#777777;font-weight:normal;text-decoration:line-through;" class="sale_price">
			# 15,000							</span>
			# </li>
			#
			####################################
			
			li_list = product_ctx.find_all('li')
			for li_ctx in li_list :
				title_ctx = li_ctx.find('span', class_='price_txt')
				value_ctx = li_ctx.find('span', class_='sale_price')
				if(title_ctx != None) and (value_ctx != None) :
					title_name = title_ctx.get_text().strip()
					title_value = value_ctx.get_text().strip()
					if( title_name == '판매가' ) : product_data.crw_price = int( __UTIL__.get_only_digit( title_value ) )
					elif( title_name == '이벤트가' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( title_value ) )
	

			if( crw_post_url != '' ) :
				#if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : 
				
				self.set_product_data_sub( product_data, crw_post_url )		
				self.process_product_api(product_data)
										
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True	
Exemple #27
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            self.reset_product_category(product_data)

            ####################################
            # 상품 카테고리 추출
            ####################################
            __LOG__.Trace(self.PAGE_URL_HASH[page_url])

            split_list = self.PAGE_URL_HASH[page_url].split('|')
            idx = 0
            for split_data in split_list:
                idx += 1
                if (idx == 1): product_data.crw_category1 = split_data
                elif (idx == 2): product_data.crw_category2 = split_data
                elif (idx == 3): product_data.crw_category3 = split_data
            '''
			div_list = soup.find_all( 'div' , class_='cntbody' )
			for div_ctx in div_list :				
				category_list = div_ctx.find_all( 'h2', class_='subtitle' )
				for category_ctx in category_list :
					product_data.crw_category1 = category_ctx.get_text().strip()
			'''
            '''			
			####################################
			# 브랜드 추출	
			####################################
			brand_div_list = product_ctx.find_all('span', class_='item_brand')
			for brand_ctx in brand_div_list :
				brand_name = brand_ctx.get_text().strip()
				if( brand_name != '') : product_data.crw_brand1 = brand_name.replace('[','').replace(']','').strip()
			'''

            ####################################
            # 상품 이미지 확인
            #
            # <div class="prdimg"><a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&amp;rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&amp;ctype=1&amp;cno1=1037"><img src="https://howlpotdesign.wisacdn.com/_data/product/d0dcc887757a47bd539823e77b7a3da6.jpg" width="292" height="292"></a></div>
            #
            ####################################

            img_div_list = product_ctx.find_all('div', class_='prdimg')
            for img_div_ctx in img_div_list:
                img_ctx = img_div_ctx.find('img')

                #for img_ctx in img_list :
                if (img_ctx != None):
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        img_link = self.set_img_url(self.BASIC_IMAGE_URL,
                                                    img_src)
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

            ####################################
            # 품절여부 추출
            # 품절시 <div class="box out"> 으로 표현됨
            #
            # <div class="box out">
            # <div class="no">03</div>
            # <div class="img">
            # 생략
            # </div>
            # <div class="info">
            # 생략
            # </div>
            # </div>
            #
            ####################################

            if ('class' in product_ctx.attrs):
                class_name_list = product_ctx.attrs['class']
                if (len(class_name_list) == 2):
                    if (class_name_list[1] == 'out'):
                        product_data.crw_is_soldout = 1

            ####################################
            # 상품 링크 정보 및 상품명 / 상품코드
            #
            # <div class="name">
            # <a href="https://www.howlpot.com/shop/detail.php?pno=41AE36ECB9B3EEE609D05B90C14222FB&amp;rURL=https%3A%2F%2Fwww.howlpot.com%2Fshop%2Fbig_section.php%3Fcno1%3D1037&amp;ctype=1&amp;cno1=1037">메모리폼_라이트 그레이</a>
            # <span class="wish"><a href="#" onclick="wishPartCartAjax(&quot;41AE36ECB9B3EEE609D05B90C14222FB&quot;, this); return false;">관심상품 담기</a></span>
            # </div>
            #
            ####################################
            name_strong_list = product_ctx.find_all('div', class_='name')
            for name_strong_ctx in name_strong_list:
                product_link_ctx = name_strong_ctx.find('a')
                if (product_link_ctx != None):
                    #__LOG__.Trace( product_link_ctx )
                    if ('href' in product_link_ctx.attrs):
                        product_data.crw_name = product_link_ctx.get_text(
                        ).strip()

                        tmp_product_link = product_link_ctx.attrs[
                            'href'].strip()
                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                product_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?pno=')
                        second_split_list = split_list[1].split('&')
                        product_data.crw_goods_code = second_split_list[
                            0].strip()

            ####################################
            # 가격
            #
            # <div class="price">
            # <span class="sell"><span class="font">98,000</span></span>
            # </div>
            #
            ####################################

            div_list = product_ctx.find_all('div', class_='price')
            for div_ctx in div_list:
                sell_ctx = div_ctx.find('span', class_='sell')
                consumer_ctx = div_ctx.find('span', class_='consumer')
                if (consumer_ctx != None):
                    product_data.crw_price = int(
                        __UTIL__.get_only_digit(
                            consumer_ctx.get_text().strip()))

                if (sell_ctx != None):
                    # 타임세일일때  뒷부분의 별도의 값이 붙어서, 값 이상 문제 해결법,
                    crw_price_sale = sell_ctx.get_text().strip().split('\n')
                    product_data.crw_price_sale = int(
                        __UTIL__.get_only_digit(crw_price_sale[0].strip()))

            if (crw_post_url != ''):
                #if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) :

                self.set_product_data_sub(product_data, crw_post_url)
                self.process_product_api(product_data)

                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #28
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:
            product_data = ProductData()
            crw_post_url = ''

            # 상품 카테고리
            #
            self.set_product_category_third(product_data, soup)

            # 상품 이미지 확인
            # <a href="/product/detail.html?product_no=417&amp;cate_no=29&amp;display_group=1" name="anchorBoxName_417"><img src="//www.wefam.co.kr/web/product/medium/201704/417_shop1_670038.jpg?cmd=thumb&amp;width=300&amp;height=300" data-original="//www.wefam.co.kr/web/product/medium/201704/417_shop1_670038.jpg?cmd=thumb&amp;width=300&amp;height=300" id="eListPrdImage417_1" alt="" class="thumb" style="display: inline; opacity: 1;"></a>
            ###########################
            img_link_list = product_ctx.find_all('a')
            for img_link_ctx in img_link_list:
                if ('name' in img_link_ctx.attrs):
                    if (0 <=
                            img_link_ctx.attrs['name'].find('anchorBoxName_')):
                        tmp_product_link = img_link_ctx.attrs['href'].strip()

                        if (0 != tmp_product_link.find('http')):
                            tmp_product_link = '%s%s' % (
                                self.BASIC_PRODUCT_URL,
                                img_link_ctx.attrs['href'].strip())
                        crw_post_url = tmp_product_link

                        if (self.C_PRODUCT_STRIP_STR != ''):
                            crw_post_url = tmp_product_link.replace(
                                self.C_PRODUCT_STRIP_STR, '')

                        split_list = crw_post_url.split('?product_no=')
                        crw_goods_code_list = split_list[1].strip().split('&')
                        product_data.crw_goods_code = crw_goods_code_list[
                            0].strip()

                        img_ctx = img_link_ctx.find('img')
                        if (img_ctx != None):
                            if ('data-original' in img_ctx.attrs):
                                img_src = img_ctx.attrs['data-original'].strip(
                                )
                                if (img_src != ''):
                                    img_link = self.set_img_url(
                                        self.BASIC_IMAGE_URL, img_src)
                                    if (product_data.product_img == ''):
                                        product_data.product_img = self.get_hangul_url_convert(
                                            img_link)

            # 품절여부 확인
            self.set_product_soldout_first(product_data, product_ctx)

            #
            # 상품명 / 가격
            #
            # <ul class="xans-product-listitem">
            # <li class="manu">we.fam (위팸)</li>
            # <li class="second"><span><span style="font-size:13px;color:#2e2e2e;">Zigzag Pattern Sleeveless_Navy</span></span></li>
            # <li class="custom">25,000 WON</li>
            # <li class="price">15,800 WON</li>
            # </ul>
            #
            li_list = product_ctx.find_all('li')

            for li_ctx in li_list:
                if ('class' in li_ctx.attrs):
                    class_name_list = li_ctx.attrs['class']
                    value_str = li_ctx.get_text().strip()
                    if (class_name_list[0] == 'manu'):
                        product_data.crw_brand1 = value_str
                    elif (class_name_list[0] == 'second'):
                        product_data.crw_name = value_str
                    elif (class_name_list[0] == 'custom'):
                        int_value_str = int(__UTIL__.get_only_digit(value_str))
                        if (int_value_str != 0):
                            product_data.crw_price = int(
                                __UTIL__.get_only_digit(value_str))
                    elif (class_name_list[0] == 'price'):
                        product_data.crw_price_sale = int(
                            __UTIL__.get_only_digit(value_str))

            if (crw_post_url != ''):
                self.set_product_url_hash(product_data, crw_post_url)
                rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #29
0
    def set_product_data(self, page_url, soup, product_ctx):

        #
        #
        try:

            crw_post_url = ''

            ####################################
            # 상품 카테고리 추출
            ####################################
            category_list = soup.select(
                'body > table > tr > td > table > tr > td.outline_side > div.indiv > form > table> tr > td > b > a'
            )
            for category_ctx in category_list:
                crw_category = category_ctx.get_text().strip()
                break

            # 유효한 카테고리 체크
            if (self.check_ignore_category_text(crw_category)):

                product_data = ProductData()
                product_data.crw_category1 = crw_category

                ####################################
                # 상품 이미지 확인
                ####################################

                img_ctx = product_ctx.find('img')
                if (img_ctx != None):
                    img_src = ''
                    if ('data-original' in img_ctx.attrs):
                        img_src = img_ctx.attrs['data-original'].strip()
                    elif ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()

                    if (img_src != ''):
                        tmp_img_link = self.BASIC_IMAGE_URL + '/shop' + img_src
                        img_link = tmp_img_link.replace('..', '')
                        product_data.product_img = self.get_hangul_url_convert(
                            img_link)

                ####################################
                # 품절여부 추출
                # <img src="/shop/data/skin/freemart/img/icon/good_icon_soldout.gif">
                ####################################

                img_list = product_ctx.find('img')
                for img_ctx in img_list:
                    img_src = ''
                    if ('src' in img_ctx.attrs):
                        img_src = img_ctx.attrs['src'].strip()
                        if (0 <= img_src.find('soldout')):
                            product_data.crw_is_soldout = 1

                ####################################
                # 상품 링크 정보 및 상품명 / 상품코드
                ####################################
                #
                # 상품 링크 정보 및 상품명 / 상품코드
                is_product_name = True
                is_product_link = True

                product_link_list = product_ctx.find_all('a')
                for product_link_ctx in product_link_list:
                    product_name = product_link_ctx.get_text().strip()

                    # 첫번때 A link에 있는 Text
                    if (is_product_name) and (product_name != ''):
                        product_data.crw_name = product_name
                        is_product_name = False

                    if (is_product_link):
                        if ('href' in product_link_ctx.attrs):
                            tmp_product_link = product_link_ctx.attrs[
                                'href'].strip()
                            if (tmp_product_link.find('javascript') < 0):
                                if (0 != tmp_product_link.find('http')):
                                    tmp_product_link = '%s%s' % (
                                        self.BASIC_PRODUCT_URL,
                                        product_link_ctx.attrs['href'].strip())
                                crw_post_url = tmp_product_link

                                if (self.C_PRODUCT_STRIP_STR != ''):
                                    crw_post_url = tmp_product_link.replace(
                                        self.C_PRODUCT_STRIP_STR, '')

                                split_list = crw_post_url.split('?goodsno=')
                                sub_split_list = split_list[1].strip().split(
                                    '&')
                                product_data.crw_goods_code = sub_split_list[
                                    0].strip()
                                is_product_link = False

                ####################################
                # 가격
                ####################################
                div_list = product_ctx.find_all('div')
                for div_ctx in div_list:
                    cost_ctx = div_ctx.find('b')
                    if (cost_ctx != None):
                        product_data.crw_price = int(
                            __UTIL__.get_only_digit(
                                cost_ctx.get_text().strip()))

                if (crw_post_url != ''):
                    if (self.PRODUCT_URL_HASH.get(crw_post_url, -1) == -1):

                        self.set_product_data_sub(product_data, crw_post_url)

                        #self.print_product_page_info( product_data )
                        self.process_product_api(product_data)

                    rtn = True

        except Exception as ex:
            __LOG__.Error('에러 : set_product_data')
            __LOG__.Error(ex)
            pass

        return True
Exemple #30
0
	def set_product_data(self , page_url, soup, product_ctx ) :
		
		# 
		#
		try :
			product_data = ProductData()
			crw_post_url = ''
			
			
			self.reset_product_category(product_data)
			
			####################################
			# 상품 카테고리 추출
			####################################
			
			self.get_category_value( product_data, page_url, soup )
			
			

			####################################				
			# 상품 이미지 확인
			#
			# <div class="prdImg  scroll-fade">
                #     <a href="/product/방수커버/57/category/61/display/1/" name="anchorBoxName_57">
                #         <img src="//eledog.co.kr/web/product/medium/202011/74b4cc12fc9dd4f38c49d4de2d2f6b51.jpg" id="eListPrdImage57_1" class="thumb_Img" alt="방수커버">                    </a>              
                # </div>
			#
			# class_='_org_img org_img _lazy_img'
			# class_='_org_img org_img owl-lazy'
			####################################
			
			img_ctx = product_ctx.find('div', class_='prdImg')
			
			
			
			img_ = img_ctx.find('img', class_='thumb_Img')
			
			img_src = ''
			if('src' in img_.attrs ) : img_src = img_.attrs['src'].strip()
			
			if( img_src != '' ) :
				img_link = self.set_img_url( self.BASIC_IMAGE_URL, img_src )
				product_data.product_img = self.get_hangul_url_convert( img_link )

			crw_goods_code_ = img_ctx.find('a')

			if(crw_goods_code_ != '') :
				if('name' in crw_goods_code_.attrs ) :
					if( 0 <= crw_goods_code_.attrs['name'].find('anchorBoxName_')) :
						class_name_list = crw_goods_code_.attrs['name']
						product_data.crw_goods_code = class_name_list.replace('anchorBoxName_','')


			
			####################################
			# 품절여부 추출
			#
			# <div class="promotion"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/ico_product_soldout.gif" class="icon_img" alt="품절">      </div>
			#
			####################################
			soldout_div_list = product_ctx.find_all('img', alt='품절')
			for soldout_div_ctx in soldout_div_list :
				product_data.crw_is_soldout = 1
			
			####################################
			# <div class="item_list_box">
			#             <div class="thumbnail">
			#                 <div class="custom_pro  dj_count30"><span class="dj-mov-fade-in-out2">21%</span></div>
			#                 <div class="button">
			#                       <ul>
			# <li class="likeButton displaynone"><button type="button">LIKE<strong></strong></button></li>
			#                       </ul>
			# <ul class="other">
			# <li class="option"><span>옵션보기</span>
			# </li>
			#                         <li class="cart">
			# <a href="#none"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/btn_list_cart.gif" onclick="CAPP_SHOP_NEW_PRODUCT_OPTIONSELECT.selectOptionCommon(43,  42, 'basket', '')" alt="장바구니 담기" class="ec-admin-icon cart"></a><span>장바구니</span>
			# </li>
			#                         <li class="zoom">
			# <a href="#none"><img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/btn_prd_zoom.gif" onclick="zoom('43', '42', '1','', '');" style="cursor:pointer" alt="상품 큰 이미지 보기"></a><span>확대보기</span>
			# </li>
			#                         <li class="pop">
			# <a href="/product/ver02-맞춤이가방-카키/43/category/42/display/1/" target="blank"><img src="/_dj/img/button_other_04.png"></a><span>새창보기</span>
			# </li>
			#                       </ul>
			# </div>
			#                   <div class="prdImg  scroll-fade">
			#                     <a href="/product/ver02-맞춤이가방-카키/43/category/42/display/1/" name="anchorBoxName_43">
			#                         <img src="//eledog.co.kr/web/product/medium/202011/d05fff61e25635e2dae85c6a7dadce63.jpg" id="eListPrdImage43_1" class="thumb_Img" alt="[ver.02] 맞춤이가방 : 카키">                    </a>              
			#                 </div>
			#             </div>
			#             <div class="description" onclick="window.location.href='/product/ver02-맞춤이가방-카키/43/category/42/display/1/';">
			#                 <div class="inner">
			#                      <div class="displaynone">
			#                                            </div>
			#                    <div class="brand displaynone"></div>
			#                    <strong class="name"><a href="/product/ver02-맞춤이가방-카키/43/category/42/display/1/" class=""><span style="font-size:12px;color:#555555;">[ver.02] 맞춤이가방 : 카키</span></a></strong>
			#                    <ul class="spec">
			# <li class="summary_line displaynone">         
			#                        </li>
			# <li class="summary displaynone"></li>
			#                        <li class="price_all">
			#                            <span class="custom ">188,000원</span>
			#                            <span class="price  displaynone"><span class="strike">149,000원</span><span class="pri">149,000원</span></span>
			#                            <span class="sale displaynone"></span> 
			#                        </li>
			#                    </ul>
			# <div class="icon">
			#                        <div class="promotion">  <img src="//img.echosting.cafe24.com/design/skin/admin/ko_KR/ico_product_recommended.gif" class="icon_img" alt="추천">    </div>
			#                    </div>
			#                    </div>
			#              </div>
			#          </div>
			#
			####################################
			name_div_list = product_ctx.find_all('div', class_='description')

			for name_div_ctx in name_div_list :
				h2_list = name_div_ctx.find_all('strong')
				for h2_ctx in h2_list :
					
					product_link_ctx = name_div_ctx.find('a')
					if( product_link_ctx != None) :

						if('href' in product_link_ctx.attrs ) : 

							product_data.crw_name = h2_ctx.get_text().strip()
							
							crw_post_url = self.get_crw_post_url( product_link_ctx, 'href')

							
							
			
			####################################
			# 가격
			#
			# <li class="price_all">
			#     <span class="custom ">188,000원</span>
			#     <span class="price  displaynone"><span class="strike">149,000원</span><span class="pri">149,000원</span></span>
			#     <span class="sale displaynone"></span> 
			# </li>
			#
			####################################			
			price_div_list = product_ctx.find_all('li', class_='price_all')
			
			for price_ctx in price_div_list :	
				p_list = name_div_ctx.find_all('span')
				for p_ctx in p_list :
					if('class' in p_ctx.attrs ) :
						class_name_list = p_ctx.attrs['class']
						if(class_name_list[0] == 'custom' ) : product_data.crw_price = int( __UTIL__.get_only_digit( p_ctx.get_text().strip() ) )
						elif(class_name_list[0] == 'pri' ) : product_data.crw_price_sale = int( __UTIL__.get_only_digit( p_ctx.get_text().strip() ))
					

			
			if( crw_post_url != '' ) :
				#if( self.PRODUCT_URL_HASH.get( crw_post_url , -1) == -1) : 
				
				self.set_product_data_sub( product_data, crw_post_url )

				self.process_product_api(product_data)
										
				rtn = True


		except Exception as ex:
			__LOG__.Error('에러 : set_product_data')
			__LOG__.Error(ex)
			pass
			
		return True