def task_category(self, grab, task): print "Category url: %s" % task.url cats = Category.objects.filter(urls__contains=task.url) if not cats: cat = Category(name=task.elem.text.strip(), root_category=task.get("category")) cat.urls = task.url cat.save() else: cat = cats[0] if task.elem.getnext(): for elem in task.elem.getnext().xpath(".//a"): if elem.getnext(): continue yield Task('category', url="".join((elem.get("href"),"?ALL=1")), elem=elem, category=cat) else: for elem in grab.doc.select(self.xpath['product']): yield Task('product', url=elem.attr("href"), category=cat)