Python Product.dept 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: models

클래스/타입: Product

메소드/함수: dept

hotexamples.com에서의 예제들: 3

Python Product.dept - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 models.Product.dept에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Product(30)

get(23)

description(22)

find(22)

create(20)

get_by_key_name(13)

delete(10)

category(9)

configure(9)

all(9)

deserialize(6)

brand(6)

combine_url(5)

list_update_time(5)

listprice(5)

hit_time(4)

find_by_name(4)

event_type(4)

find_by_category(4)

getProductsList(3)

dept(3)

key(3)

format(3)

create_table(3)

code(3)

available(3)

reserve(3)

barcodes(3)

deep(2)

first(2)

firstThumbDominantColor(2)

drop_table(2)

create_by_barcode(2)

model(1)

full_update_time(1)

site(1)

sizes(1)

soldout(1)

generate_create_sql(1)

split_and_search(1)

from_list(1)

set_options(1)

from_dict(1)

stock(1)

subcategory_desc(1)

package_size(1)

prepare_price(1)

summary(1)

shipping(1)

get_price(1)

예제 #1

파일 보기

파일: server.py 프로젝트: mobishift2011/amzn

    def crawl_listing(self, url, ctx='', **kwargs):
        if url.startswith('http://blogs.nordstrom.com'):
            return
        try:
            res = requests.get(url, params={'sort': 'sale'})
        except requests.exceptions.ConnectionError:
            return

        res.raise_for_status()
        tree = lxml.html.fromstring(res.content)
        listing_node = tree.cssselect('div.fashion-results')

        if listing_node:
            listing_node = listing_node[0]
        else:
            if tree.cssselect('div#brandsIndex'):
                return

            self.crawl_listing_of_no_leaf(tree, ctx=ctx, **kwargs)
            return

        product_nodes = listing_node.cssselect('div.row div.fashion-item')
        if not product_nodes:
            self.crawl_listing_of_no_leaf(tree, ctx=ctx, **kwargs)
            return
        
        category = Category.objects(key=kwargs.get('key')).first()
        no_discount_num = 0 # sometimes no discount product occurs between the  discount ones ordered by sale.
        for product_node in product_nodes:
            key = product_node.get('id')
            if not key:
                common_failed.send(sender=ctx, url=url, reason='listing product has no id')
                continue

            try:
                info_node = product_node.cssselect('div.info')[0]
                a_node = info_node.cssselect('a')[0]
                title = a_node.text.strip()

                price = None; listprice = None
                price_nodes = info_node.cssselect(".price")
                for price_node in price_nodes:
                    if 'regular' in price_node.get('class'):
                        listprice = price_node.text
                    elif 'sale' in price_node.get('class'):
                        price = price_node.text
                
                if price is None or listprice is None:
                    no_discount_num += 1
                    if no_discount_num < 3:
                        continue
                    # common_failed.send(sender=ctx, url=url, \
                    #     reason='listing product %s.%s cannot crawl price info -> %s / %s' % (key, title, price, listprice))
                    return

                combine_url = a_node.get('href')
                if not combine_url:
                    common_failed.send(sender=ctx, url=url, reason='listing product %s.%s cannot crawl combine_url' % (key, title))
                    continue

                match = re.search(r'https?://.+', combine_url)
                if not match:
                    combine_url = 'http://shop.nordstrom.com%s' % (combine_url)

            except IndexError:
                print traceback.format_exc()
                common_failed.send(sender=ctx, url=url, reason='listing product %s -> %s' % (key, traceback.format_exc()))
                continue


            is_new = False; is_updated = False
            product = Product.objects(key=key).first()
            if not product:
                is_new = True
                product = Product(key=key)
                product.updated = False
                product.event_type = False

            if combine_url and combine_url != product.combine_url:
                product.combine_url = combine_url
                is_updated = True

            if title and title != product.title:
                product.title = title
                is_updated = True

            if price and price != product.price:
                product.price = price
                is_updated = True

            if listprice and listprice != product.listprice:
                product.listprice = listprice
                is_updated = True

            if category.cats and set(category.cats).difference(product.dept):
                product.dept = list(set(category.cats) | set(product.dept or []))
                is_updated = True

            if category.key not in product.category_key:
                product.category_key.append(category.key)
                is_updated = True

            if is_updated:
                product.list_update_time = datetime.utcnow()
            
            # To pick the product which fit our needs, such as a certain discount, brand, dept etc.
            selected = Picker(site='nordstrom').pick(product)
            if not selected:
                continue

            product.hit_time = datetime.utcnow()
            product.save()
            
            # print product.title
            # print product.combine_url
            # print product.listprice
            # print product.price
            # print is_new
            # print is_updated
            # print

            common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
                is_new=is_new, is_updated=((not is_new) and is_updated) )

        # Go to the next page to keep on crawling.
        try:
            arrow_node = tree.cssselect('div.fashion-results-header div.fashion-results-pager ul.arrows li.next')[0]
        except IndexError:
            common_failed.send(sender=ctx, url=url, reason=traceback.format_exc())
            return
        next_page = arrow_node.cssselect('a')[0].get('href') \
            if 'disabled' not in arrow_node.get('class') else None

        if next_page:
            print next_page
            self.crawl_listing(url=next_page, ctx=ctx, **kwargs)

예제 #2

파일 보기

파일: server.py 프로젝트: mobishift2011/amzn

    def crawl_listing(self, url, ctx='', **kwargs):
        res = requests.get(url)
        res.raise_for_status()
        tree = lxml.html.fromstring(res.content)

        category = Category.objects(key=kwargs.get('key')).first()
        if not category:
            common_failed.send(sender=ctx, url=url, reason='category %s not found in db' % kwargs.get('key'))
            return

        product_nodes = tree.cssselect('div#searchResults a')
        for product_node in product_nodes:
            price = None; listprice = None
            price = product_node.cssselect('.price-6pm')[0].text
            listprice_node = product_node.cssselect('.discount')
            listprice = ''.join(listprice_node[0].xpath('text()')) if listprice_node else None

            # eliminate products of no discountIndexError:
            if price is None or listprice is None:
                # common_failed.send(sender=ctx, url=url, \
                #     reason='listing product %s.%s cannot crawl price info -> %s / %s' % (key, title, price, listprice))
                continue

            key = product_node.get('data-product-id')
            if not key:
                common_failed.send(sender=ctx, url=url, reason='listing product has no key')
                continue

            combine_url = product_node.get('href')
            key = '%s_%s' % (key, combine_url.split('/')[-1])
            match = re.search(r'https?://.+', combine_url)
            if not match:
                combine_url = '%s%s' % (HOST, combine_url)

            brand = product_node.cssselect('.brandName')[0].text.strip()
            title = product_node.cssselect('.productName')[0].text.strip()

            is_new = False; is_updated = False
            product = Product.objects(key=key).first()
            if not product:
                is_new = True
                product = Product(key=key)
                product.updated = False
                product.event_type = False

            if title and title != product.title:
                product.title = title
                is_updated = True

            if brand and brand != product.brand:
                product.brand = brand
                is_updated = True

            if combine_url and combine_url != product.combine_url:
                product.combine_url = combine_url
                is_updated = True

            if price and price != product.price:
                product.price = price
                is_updated = True

            if listprice and listprice != product.listprice:
                product.listprice = listprice
                is_updated = True

            if category.cats and set(category.cats).difference(product.dept):
                product.dept = list(set(category.cats) | set(product.dept or []))
                is_updated = True

            if category.key not in product.category_key:
                product.category_key.append(category.key)
                is_updated = True

            if is_updated:
                product.list_update_time = datetime.utcnow()
            
            # To pick the product which fit our needs, such as a certain discount, brand, dept etc.
            selected = Picker(site='6pm').pick(product)
            if not selected:
                continue

            product.hit_time = datetime.utcnow()
            product.save()

            common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
                is_new=is_new, is_updated=((not is_new) and is_updated) )


            print product.key; print product.brand; print product.title; \
            print product.price, ' / ', product.listprice; print product.combine_url; \
            print product.dept; print

        # Go to the next page to keep on crawling.
        next_page = None
        page_node = tree.cssselect('div.pagination')
        if not page_node:
            return

        last_node =page_node[0].cssselect('.last')
        if last_node:
            next_page = page_node[0].cssselect('a')[-1].get('href')

        if next_page:
            match = re.search(r'https?://.+', next_page)
            if not match:
                next_page = '%s%s' % (HOST, next_page)
            print next_page
            self.crawl_listing(url=next_page, ctx=ctx, **kwargs)

예제 #3

파일 보기

파일: server.py 프로젝트: mobishift2011/amzn

    def crawl_listing(self, url, ctx='', **kwargs):
        res = requests.get(url, params={'Ns': 'P_sale_flag|1'})
        res.raise_for_status()
        tree = lxml.html.fromstring(res.content)

        category = Category.objects(key=kwargs.get('key')).first()
        if not category:
            print 'Category does not exist'
            common_failed.send(sender=ctx, url=url, reason='Category does not exist -> {0} .'.format(kwargs))
            return

        product_nodes = tree.cssselect('div#product-container div');
        no_discount_num = 0 # sometimes no discount product occurs between the  discount ones ordered by sale.

        for product_node in product_nodes:
            if not product_node.get('id') or 'product' not in product_node.get('id').lower():
                continue

            key = product_node.get('id')
            info_node = product_node.cssselect('div.product-text a')[0]
            price = None; listprice = None
            listprice_node = info_node.cssselect('span.product-price')
            price_node = info_node.cssselect('span.product-sale-price')
            if listprice_node:
                listprice = ''.join(listprice_node[0].xpath('.//text()')).strip()
            if price_node:
                price = ''.join(price_node[0].xpath('.//text()')).strip()

            if price is None or listprice is None:
                no_discount_num += 1
                if no_discount_num < 3:
                    continue
                return
            no_discount_num = 0

            brand = info_node.cssselect('p span.product-designer-name')[0].text
            if brand:
                brand = brand.strip()
            title = info_node.cssselect('p.product-description')[0].text.strip()
            combine_url = info_node.get('href')

            is_new = False; is_updated = False
            product = Product.objects(key=key).first()
            if not product:
                is_new = True
                product = Product(key=key)
                product.updated = False
                product.event_type = False

            if title and title != product.title:
                product.title = title
                is_updated = True
                product.update_history['title'] = datetime.utcnow()

            if brand and brand != product.brand:
                product.brand = brand
                is_updated = True

            if combine_url and combine_url != product.combine_url:
                product.combine_url = combine_url
                is_updated = True
                product.update_history['combine_url'] = datetime.utcnow()

            if price and price != product.price:
                product.price = price
                is_updated = True

            if listprice and listprice != product.listprice:
                product.listprice = listprice
                is_updated = True

            if category.cats and set(category.cats).difference(product.dept):
                product.dept = list(set(category.cats) | set(product.dept or []))
                is_updated = True

            if category.key not in product.category_key:
                product.category_key.append(category.key)
                is_updated = True

            if is_updated:
                product.list_update_time = datetime.utcnow()
            
            # To pick the product which fit our needs, such as a certain discount, brand, dept etc.
            selected = Picker(site='saksfifthavenue').pick(product)
            if not selected:
                continue

            product.hit_time = datetime.utcnow()
            product.save()
            
            # print product.brand; print product.title; print product.combine_url; print product.listprice, ' / ', product.price; print is_new; print is_updated
            # print

            common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
                is_new=is_new, is_updated=((not is_new) and is_updated) )

        # Go to the next page to keep on crawling.
        next_page = None
        page_nodes = tree.cssselect('div.pagination-container ol.pa-page-number li a')
        for page_node in page_nodes:
            if page_node.get('class') == 'next':
                href = page_node.get('href')
                match = re.search(r'https?://.+', href)
                next_page = href if match else '{0}/{1}'.format(HOST, href)
                break

        if next_page:
            print next_page
            self.crawl_listing(url=next_page, ctx=ctx, **kwargs)