Exemple #1
0
def price_parse(request):
    BASE_URL = 'http://classicag.ru/price/'

    import urllib2
    from bs4 import BeautifulSoup

    c = urllib2.urlopen(BASE_URL)
    soup = BeautifulSoup(c.read())
    for category in soup.findAll('h2', attrs={'class': 'button'}):
        print '___________________________________________________________'
        print category.contents[0].string
        cat = Category(name=category.contents[0].string)
        cat.save()

        category_container = category.findNextSiblings('div')[0]
        for subcat in category_container.findAll('h3'):
            print '   ' + subcat.string
            subc = SubCategory(category=cat, name=subcat.string)
            subc.save()
            items_table = subcat.parent.findAll('table')[0]
            for tr in items_table.findAll('tr'):
                print '      ' + tr.td.string + ' | ' + str(
                    tr.findAll('td')[1].findAll('input')[1]['value'])
                i = Item(subcategory=subc,
                         name=tr.td.string,
                         unit=tr.findAll('td')[2].string,
                         price=int(
                             tr.findAll('td')[1].findAll('input')[1]['value']))
                i.save()
    return HttpResponseRedirect('/price/')