Example #1
0
    def handle(self, *args, **options):
        """
        Scrapes and stores product information
        """
        # get beer page html and make soup object
        html = urllib2.urlopen(TOP_URL + "/beers/search")
        soup_beers = BeautifulSoup(html)

        # find all beers
        beers = soup_beers.find_all("a", "brand-link")

        for beer in beers:
            # get beer page and make soup object
            beer_url = beer["href"]
            beer_html = urllib2.urlopen(TOP_URL + beer_url)
            soup_beer = BeautifulSoup(beer_html)

            # get sizes
            beer_products = soup_beer.find_all("table", "brand-pricing")

            # get propertis and valus and merge them into dict
            labels = soup_beer.dl.find_all("dt")
            details = soup_beer.dl.find_all("dd")
            beer_details = dict(zip(labels, details))

            # get name and image
            beer_name = soup_beer.find("div", "only-desktop").find(
                "h1", "page-title").get_text()
            beer_image = soup_beer.find("div", "brand-image").img["src"]

            # get country and type
            beer_attributes = soup_beer.find("p",
                                             "introduction").find_all("span")
            beer_attributes = beer_attributes[::-1]
            beer_country = beer_attributes[0].get_text()
            beer_type = beer_attributes[1].get_text()

            # loop through beer products
            for beer_product in beer_products:
                beer_containers = beer_product.find_all("tbody")

                # loop through container tables
                for beer_container in beer_containers:
                    beer_sizes = beer_container.find_all("tr")

                    # loop through container sizes
                    for beer_size in beer_sizes:

                        # get product information
                        beer_ids = beer_size.a["href"].split('=')[1]
                        beer_id = beer_ids.split('-')[0]
                        print beer_id
                        beer_product_id = beer_ids.split('-')[1]

                        # Comment to disable monitoring
                        beer_product_size = beer_size.find("td",
                                                           "size").get_text()
                        beer_product_price = beer_size.find(
                            "td", "price").get_text()

                        # check if product exists
                        # NOTE: used this custom solution because django get_or_create
                        # doesn't play nice with custom primary keys
                        try:
                            product_entry = Product.objects.get(
                                product_id=int(beer_product_id.strip()))
                        except:
                            product_entry = Product()

                        # set fields
                        product_entry.name = beer_name.strip()
                        product_entry.size = beer_product_size.strip()
                        product_entry.beer_id = int(beer_id.strip())
                        product_entry.product_id = int(beer_product_id.strip())
                        product_entry.image_url = beer_image.strip()
                        product_entry.country = beer_country.strip()
                        product_entry.type = beer_type.strip()

                        # set product attributes
                        # NOTE: this code was created befor the beer store redesign
                        # it still works but some items no longer exist so they were
                        # temporarily omitted from the serializer
                        for key, value in beer_details.iteritems():
                            attr = key.get_text()[:-1]
                            val = value.get_text()

                            if attr == 'Category':
                                product_entry.category = val

                            if attr == 'Alcohol Content (ABV)':
                                product_entry.abv = float(val[:-1])

                            if attr == 'Style':
                                product_entry.style = val

                            if attr == 'Attributes':
                                product_entry.attributes = val

                            if attr == 'Brewer':
                                product_entry.brewer = val

                        # update pricing info
                        try:
                            product_entry.price = float(
                                beer_product_price.strip()[1:])
                            product_entry.on_sale = False

                        except:
                            product_entry.price = float(
                                beer_product_price.split('sale')[1].strip()
                                [1:])
                            product_entry.on_sale = True

                        product_entry.save()
Example #2
0
    def handle(self, *args, **options):
        """
        Scrapes and stores product information
        """
        # get beer page html and make soup object
        html = urllib2.urlopen(TOP_URL + "/beers/search")
        soup_beers = BeautifulSoup(html)

        # find all beers
        beers = soup_beers.find_all("a", "brand-link")

        for beer in beers:
            # get beer page and make soup object
            beer_url = beer["href"]
            beer_html = urllib2.urlopen(TOP_URL + beer_url)
            soup_beer = BeautifulSoup(beer_html)

            # get sizes
            beer_products = soup_beer.find_all("table", "brand-pricing")

            # get propertis and valus and merge them into dict
            labels = soup_beer.dl.find_all("dt")
            details = soup_beer.dl.find_all("dd")
            beer_details = dict(zip(labels,details))

            # get name and image
            beer_name = soup_beer.find("div", "only-desktop").find("h1", "page-title").get_text()
            beer_image = soup_beer.find("div","brand-image").img["src"]

            # get country and type
            beer_attributes = soup_beer.find("p","introduction").find_all("span")
            beer_attributes = beer_attributes[::-1]
            beer_country =  beer_attributes[0].get_text()
            beer_type = beer_attributes[1].get_text()

            # loop through beer products
            for beer_product in beer_products:
                beer_containers = beer_product.find_all("tbody")

                # loop through container tables
                for beer_container in beer_containers:
                    beer_sizes = beer_container.find_all("tr")

                    # loop through container sizes
                    for beer_size in beer_sizes:

                        # get product information
                        beer_ids = beer_size.a["href"].split('=')[1]
                        beer_id = beer_ids.split('-')[0]
                        print beer_id
                        beer_product_id = beer_ids.split('-')[1]
                    
                        # Comment to disable monitoring
                        beer_product_size = beer_size.find("td","size").get_text()
                        beer_product_price =  beer_size.find("td","price").get_text()
                    
                        # check if product exists
                        # NOTE: used this custom solution because django get_or_create
                        # doesn't play nice with custom primary keys
                        try:
                            product_entry  = Product.objects.get(product_id=int(beer_product_id.strip()))
                        except: 
                            product_entry = Product()

                        # set fields
                        product_entry.name = beer_name.strip()
                        product_entry.size = beer_product_size.strip()
                        product_entry.beer_id = int(beer_id.strip())
                        product_entry.product_id = int(beer_product_id.strip())
                        product_entry.image_url = beer_image.strip()
                        product_entry.country = beer_country.strip()
                        product_entry.type = beer_type.strip()
                        
                        # set product attributes
                        # NOTE: this code was created befor the beer store redesign
                        # it still works but some items no longer exist so they were 
                        # temporarily omitted from the serializer
                        for key, value in beer_details.iteritems():
                            attr = key.get_text()[:-1]
                            val = value.get_text()

                            if attr == 'Category':
                                product_entry.category = val

                            if attr == 'Alcohol Content (ABV)':
                                product_entry.abv = float(val[:-1])

                            if attr == 'Style':
                                product_entry.style= val

                            if attr == 'Attributes':
                                product_entry.attributes= val

                            if attr == 'Brewer':
                                product_entry.brewer= val
            

                        # update pricing info 
                        try:
                            product_entry.price = float(beer_product_price.strip()[1:])
                            product_entry.on_sale = False
                        
                        except:
                            product_entry.price = float(beer_product_price.split('sale')[1].strip()[1:])
                            product_entry.on_sale = True

                        product_entry.save()
Example #3
0
    try:
        sku = _products[0]['sku']
    except:
        continue

    name = _products[0]['name']

    try:
        product = Product.objects.get(sku=sku)
    except:
        product = Product()
        product.sku = sku

    product.title = _products[0]['name']
    product.price = float(_products[0]['price'])
    product.attributes = _products[0]['image_url']

    category, _a = Category.objects.get_or_create(title=_products[0]['set'])

    product.category = category

    product.save()

    color = []
    size = []

    for _product in _products:
        try:
            size.append(_product['size'])
        except:
            pass
Example #4
0
def get_product(q):

    while True:
        _url = "http://www.choies.com/api/item?sku=%s"

        oldproduct = q.get()
        print oldproduct
        url = _url % oldproduct.sku

        try:
            r = requests.get(url=url, timeout=2)
        except:
            q.task_done()
            return

        print url

        try:
            _products = r.json()
        except:
            oldproduct.status = False
            oldproduct.save()
            q.task_done()
            return

        try:
            sku = _products[0]['sku']
        except:
            q.task_done()
            return

        name = _products[0]['name']

        try:
            product = Product.objects.get(sku=sku)
        except:
            product = Product()
            product.sku = sku

        product.title = _products[0]['name']
        product.price = float(_products[0]['price'])
        product.attributes = _products[0]['image_url']

        category, _a = Category.objects.get_or_create(name=_products[0]['set'])

        product.category = category

        product.save()

        color = []
        size = []

        for _product in _products:
            try:
                size.append(_product['size'])
            except:
                pass

            try:
                color.append(_product['color'])
            except:
                pass

        if color:
            productattribute, _a = ProductAttribute.objects.get_or_create(
                name='color', product=product)
            productattribute.options = ','.join(color)
            productattribute.save()

        if size:
            productattribute, _a = ProductAttribute.objects.get_or_create(
                name='size', product=product)
            productattribute.options = ','.join(size)
            productattribute.save()

        oldproduct.status = False
        oldproduct.save()
        print _product
        q.task_done()