Python Product.dept Examples

Programming Language: Python

Namespace/Package Name: models

Class/Type: Product

Method/Function: dept

Examples at hotexamples.com: 3

Python Product.dept - 3 examples found. These are the top rated real world Python examples of models.Product.dept extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Product(30)

get(23)

description(22)

find(22)

create(20)

get_by_key_name(13)

delete(10)

category(9)

configure(9)

all(9)

deserialize(6)

brand(6)

combine_url(5)

list_update_time(5)

listprice(5)

hit_time(4)

find_by_name(4)

event_type(4)

find_by_category(4)

getProductsList(3)

dept(3)

key(3)

format(3)

create_table(3)

code(3)

available(3)

reserve(3)

barcodes(3)

deep(2)

first(2)

firstThumbDominantColor(2)

drop_table(2)

create_by_barcode(2)

model(1)

full_update_time(1)

site(1)

sizes(1)

soldout(1)

generate_create_sql(1)

split_and_search(1)

from_list(1)

set_options(1)

from_dict(1)

stock(1)

subcategory_desc(1)

package_size(1)

prepare_price(1)

summary(1)

shipping(1)

get_price(1)

Example #1

Show file

File: server.py Project: mobishift2011/amzn

    def crawl_listing(self, url, ctx='', **kwargs):
        if url.startswith('http://blogs.nordstrom.com'):
            return
        try:
            res = requests.get(url, params={'sort': 'sale'})
        except requests.exceptions.ConnectionError:
            return

        res.raise_for_status()
        tree = lxml.html.fromstring(res.content)
        listing_node = tree.cssselect('div.fashion-results')

        if listing_node:
            listing_node = listing_node[0]
        else:
            if tree.cssselect('div#brandsIndex'):
                return

            self.crawl_listing_of_no_leaf(tree, ctx=ctx, **kwargs)
            return

        product_nodes = listing_node.cssselect('div.row div.fashion-item')
        if not product_nodes:
            self.crawl_listing_of_no_leaf(tree, ctx=ctx, **kwargs)
            return
        
        category = Category.objects(key=kwargs.get('key')).first()
        no_discount_num = 0 # sometimes no discount product occurs between the  discount ones ordered by sale.
        for product_node in product_nodes:
            key = product_node.get('id')
            if not key:
                common_failed.send(sender=ctx, url=url, reason='listing product has no id')
                continue

            try:
                info_node = product_node.cssselect('div.info')[0]
                a_node = info_node.cssselect('a')[0]
                title = a_node.text.strip()

                price = None; listprice = None
                price_nodes = info_node.cssselect(".price")
                for price_node in price_nodes:
                    if 'regular' in price_node.get('class'):
                        listprice = price_node.text
                    elif 'sale' in price_node.get('class'):
                        price = price_node.text
                
                if price is None or listprice is None:
                    no_discount_num += 1
                    if no_discount_num < 3:
                        continue
                    # common_failed.send(sender=ctx, url=url, \
                    #     reason='listing product %s.%s cannot crawl price info -> %s / %s' % (key, title, price, listprice))
                    return

                combine_url = a_node.get('href')
                if not combine_url:
                    common_failed.send(sender=ctx, url=url, reason='listing product %s.%s cannot crawl combine_url' % (key, title))
                    continue

                match = re.search(r'https?://.+', combine_url)
                if not match:
                    combine_url = 'http://shop.nordstrom.com%s' % (combine_url)

            except IndexError:
                print traceback.format_exc()
                common_failed.send(sender=ctx, url=url, reason='listing product %s -> %s' % (key, traceback.format_exc()))
                continue


            is_new = False; is_updated = False
            product = Product.objects(key=key).first()
            if not product:
                is_new = True
                product = Product(key=key)
                product.updated = False
                product.event_type = False

            if combine_url and combine_url != product.combine_url:
                product.combine_url = combine_url
                is_updated = True

            if title and title != product.title:
                product.title = title
                is_updated = True

            if price and price != product.price:
                product.price = price
                is_updated = True

            if listprice and listprice != product.listprice:
                product.listprice = listprice
                is_updated = True

            if category.cats and set(category.cats).difference(product.dept):
                product.dept = list(set(category.cats) | set(product.dept or []))
                is_updated = True

            if category.key not in product.category_key:
                product.category_key.append(category.key)
                is_updated = True

            if is_updated:
                product.list_update_time = datetime.utcnow()
            
            # To pick the product which fit our needs, such as a certain discount, brand, dept etc.
            selected = Picker(site='nordstrom').pick(product)
            if not selected:
                continue

            product.hit_time = datetime.utcnow()
            product.save()
            
            # print product.title
            # print product.combine_url
            # print product.listprice
            # print product.price
            # print is_new
            # print is_updated
            # print

            common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
                is_new=is_new, is_updated=((not is_new) and is_updated) )

        # Go to the next page to keep on crawling.
        try:
            arrow_node = tree.cssselect('div.fashion-results-header div.fashion-results-pager ul.arrows li.next')[0]
        except IndexError:
            common_failed.send(sender=ctx, url=url, reason=traceback.format_exc())
            return
        next_page = arrow_node.cssselect('a')[0].get('href') \
            if 'disabled' not in arrow_node.get('class') else None

        if next_page:
            print next_page
            self.crawl_listing(url=next_page, ctx=ctx, **kwargs)

Example #2

Show file

File: server.py Project: mobishift2011/amzn

    def crawl_listing(self, url, ctx='', **kwargs):
        res = requests.get(url)
        res.raise_for_status()
        tree = lxml.html.fromstring(res.content)

        category = Category.objects(key=kwargs.get('key')).first()
        if not category:
            common_failed.send(sender=ctx, url=url, reason='category %s not found in db' % kwargs.get('key'))
            return

        product_nodes = tree.cssselect('div#searchResults a')
        for product_node in product_nodes:
            price = None; listprice = None
            price = product_node.cssselect('.price-6pm')[0].text
            listprice_node = product_node.cssselect('.discount')
            listprice = ''.join(listprice_node[0].xpath('text()')) if listprice_node else None

            # eliminate products of no discountIndexError:
            if price is None or listprice is None:
                # common_failed.send(sender=ctx, url=url, \
                #     reason='listing product %s.%s cannot crawl price info -> %s / %s' % (key, title, price, listprice))
                continue

            key = product_node.get('data-product-id')
            if not key:
                common_failed.send(sender=ctx, url=url, reason='listing product has no key')
                continue

            combine_url = product_node.get('href')
            key = '%s_%s' % (key, combine_url.split('/')[-1])
            match = re.search(r'https?://.+', combine_url)
            if not match:
                combine_url = '%s%s' % (HOST, combine_url)

            brand = product_node.cssselect('.brandName')[0].text.strip()
            title = product_node.cssselect('.productName')[0].text.strip()

            is_new = False; is_updated = False
            product = Product.objects(key=key).first()
            if not product:
                is_new = True
                product = Product(key=key)
                product.updated = False
                product.event_type = False

            if title and title != product.title:
                product.title = title
                is_updated = True

            if brand and brand != product.brand:
                product.brand = brand
                is_updated = True

            if combine_url and combine_url != product.combine_url:
                product.combine_url = combine_url
                is_updated = True

            if price and price != product.price:
                product.price = price
                is_updated = True

            if listprice and listprice != product.listprice:
                product.listprice = listprice
                is_updated = True

            if category.cats and set(category.cats).difference(product.dept):
                product.dept = list(set(category.cats) | set(product.dept or []))
                is_updated = True

            if category.key not in product.category_key:
                product.category_key.append(category.key)
                is_updated = True

            if is_updated:
                product.list_update_time = datetime.utcnow()
            
            # To pick the product which fit our needs, such as a certain discount, brand, dept etc.
            selected = Picker(site='6pm').pick(product)
            if not selected:
                continue

            product.hit_time = datetime.utcnow()
            product.save()

            common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
                is_new=is_new, is_updated=((not is_new) and is_updated) )


            print product.key; print product.brand; print product.title; \
            print product.price, ' / ', product.listprice; print product.combine_url; \
            print product.dept; print

        # Go to the next page to keep on crawling.
        next_page = None
        page_node = tree.cssselect('div.pagination')
        if not page_node:
            return

        last_node =page_node[0].cssselect('.last')
        if last_node:
            next_page = page_node[0].cssselect('a')[-1].get('href')

        if next_page:
            match = re.search(r'https?://.+', next_page)
            if not match:
                next_page = '%s%s' % (HOST, next_page)
            print next_page
            self.crawl_listing(url=next_page, ctx=ctx, **kwargs)

Example #3

Show file

File: server.py Project: mobishift2011/amzn

    def crawl_listing(self, url, ctx='', **kwargs):
        res = requests.get(url, params={'Ns': 'P_sale_flag|1'})
        res.raise_for_status()
        tree = lxml.html.fromstring(res.content)

        category = Category.objects(key=kwargs.get('key')).first()
        if not category:
            print 'Category does not exist'
            common_failed.send(sender=ctx, url=url, reason='Category does not exist -> {0} .'.format(kwargs))
            return

        product_nodes = tree.cssselect('div#product-container div');
        no_discount_num = 0 # sometimes no discount product occurs between the  discount ones ordered by sale.

        for product_node in product_nodes:
            if not product_node.get('id') or 'product' not in product_node.get('id').lower():
                continue

            key = product_node.get('id')
            info_node = product_node.cssselect('div.product-text a')[0]
            price = None; listprice = None
            listprice_node = info_node.cssselect('span.product-price')
            price_node = info_node.cssselect('span.product-sale-price')
            if listprice_node:
                listprice = ''.join(listprice_node[0].xpath('.//text()')).strip()
            if price_node:
                price = ''.join(price_node[0].xpath('.//text()')).strip()

            if price is None or listprice is None:
                no_discount_num += 1
                if no_discount_num < 3:
                    continue
                return
            no_discount_num = 0

            brand = info_node.cssselect('p span.product-designer-name')[0].text
            if brand:
                brand = brand.strip()
            title = info_node.cssselect('p.product-description')[0].text.strip()
            combine_url = info_node.get('href')

            is_new = False; is_updated = False
            product = Product.objects(key=key).first()
            if not product:
                is_new = True
                product = Product(key=key)
                product.updated = False
                product.event_type = False

            if title and title != product.title:
                product.title = title
                is_updated = True
                product.update_history['title'] = datetime.utcnow()

            if brand and brand != product.brand:
                product.brand = brand
                is_updated = True

            if combine_url and combine_url != product.combine_url:
                product.combine_url = combine_url
                is_updated = True
                product.update_history['combine_url'] = datetime.utcnow()

            if price and price != product.price:
                product.price = price
                is_updated = True

            if listprice and listprice != product.listprice:
                product.listprice = listprice
                is_updated = True

            if category.cats and set(category.cats).difference(product.dept):
                product.dept = list(set(category.cats) | set(product.dept or []))
                is_updated = True

            if category.key not in product.category_key:
                product.category_key.append(category.key)
                is_updated = True

            if is_updated:
                product.list_update_time = datetime.utcnow()
            
            # To pick the product which fit our needs, such as a certain discount, brand, dept etc.
            selected = Picker(site='saksfifthavenue').pick(product)
            if not selected:
                continue

            product.hit_time = datetime.utcnow()
            product.save()
            
            # print product.brand; print product.title; print product.combine_url; print product.listprice, ' / ', product.price; print is_new; print is_updated
            # print

            common_saved.send(sender=ctx, obj_type='Product', key=product.key, url=product.combine_url, \
                is_new=is_new, is_updated=((not is_new) and is_updated) )

        # Go to the next page to keep on crawling.
        next_page = None
        page_nodes = tree.cssselect('div.pagination-container ol.pa-page-number li a')
        for page_node in page_nodes:
            if page_node.get('class') == 'next':
                href = page_node.get('href')
                match = re.search(r'https?://.+', href)
                next_page = href if match else '{0}/{1}'.format(HOST, href)
                break

        if next_page:
            print next_page
            self.crawl_listing(url=next_page, ctx=ctx, **kwargs)