def filter_products(links, product_page=None, products=None, running_total=None):

                if products == None:
                    products = []

                #This function is recursive, (has_next == False) is the base case
                has_next = False

                #Lets check to see if anything on this page is on sale

                #Make an opbect the parser can handle
                soup = BeautifulSoup(product_page)

                #Find the table rows
                allrows = soup.findAll('td')

                specials = []
                for row in allrows:
                    if "specials.gif" in str(row):
                        this_row = BeautifulSoup(str(row))
                        images = this_row.findAll('img')
                        for img in images:
                            if ("title=\"" in str(img) and ("Coles Special" not in str(img)) and ("More product information available" not in str(img)) ):
                                specials.append(str(img).split("title=\"")[1].split("\"")[0])

                #The products can be identified by the fact that they preceed a link called "Add to trolley",
                #Therefore we need to keep track of preceeding links
                previous_link = None

                for link in links:

                    if link.text == "Add to trolley":
                        sale = False
                        product_name = previous_link.text
                        if product_name in specials:
                            sale=True
                        product_price = "$"+link.url.split("$")[1].split("'")[0]
                        products.append((product_name,product_price,sale))

                        try:
                            decimal_price = float(link.url.split("$")[1].split("'")[0])

                            ### VERSION 2.0 STUFF
                            date = str(now.year)+"-"+str(now.month)+"-"+str(now.day)
                            if len(Product.objects.filter(name=product_name)) == 0:
                                this_product = Product(name = product_name)
                                this_product.save()
                                print "New Product: "+product_name
                            else:
                                assert len(Product.objects.filter(name=product_name)) == 1
                                this_product = Product.objects.get(name=product_name)
                                print "Existing Product: "+product_name

                            date_clash = False
                            for p in this_product.price_history.all():
                                if str(p.date) == date:
                                    print "Pre-existing record for this date"
                                    date_clash = True
                           if not date_clash:
                                this_price = Price(price=decimal_price,date=date,sale=sale,product_id=this_product.id)
                                this_price.save()
                                this_product.price_history.add(this_price)
                                this_product.save()

                        except ValueError:
                            print "Unable to parse price, not a valid number:" +product_price
                        ####

                        if running_total:
                            running_total.increment(product_name)
Example #2
0
import sys
sys.path.append("/var/www_coles_dj/coles_scrape")
sys.path.append("/var/www_coles_dj/")
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
from django.db import models

import datetime
now = datetime.datetime.now()

from coles_data.models import Product, Price

new_product = "foo"
if len(Product.objects.filter(name=new_product)) == 0:
    x = Product(name = new_product)
    x.save()
    print "New Product: "+new_product
else:
    assert len(Product.objects.filter(name=new_product)) == 1
    x = Product.objects.get(name=new_product)
    print "Existing Product: "+new_product


y = Price(price=0.5,date=str(now.year)+"-"+str(now.month)+"-"+str(now.day),sale=False,product_id=x.id)
y.save()
x.price_history.add(y)
x.save()



print Price.objects.all()
from coles_data.models import Product, Price

try:
    f = open(sys.argv[1], 'r')
    date = raw_input("Date for this file: ")
    for line in f:
        try:        
            name, price, sale = line.split("||")
            price = float(price[1:])
            sale = sale.rstrip()
            sale = (sale == "True")

            #If this product does not already exist in the database
            if len(Product.objects.filter(name=name)) == 0:
                this_product = Product(name = name)
                this_product.save()

            else:
                assert len(Product.objects.filter(name=name)) == 1
                this_product = Product.objects.get(name=name)

            date_clash = False
            for p in this_product.price_history.all():
                if str(p.date) == date:
                    print "Pre-existing record for this date"
                    date_clash = True
            if not date_clash:
                this_price = Price(price=price,date=date,sale=sale,product_id=this_product.id)
                this_price.save()
                this_product.price_history.add(this_price)