Example #1
0
 def task_category(self, grab, task):
     print "Category url: %s" % task.url 
     cats = Category.objects.filter(urls__contains=task.url)
     if not cats:
         cat = Category(name=task.elem.text.strip(), root_category=task.get("category"))
         cat.urls = task.url
         cat.save()
     else:
         cat = cats[0]
     if task.elem.getnext():
         for elem in task.elem.getnext().xpath(".//a"):
             if elem.getnext(): continue
             yield Task('category', url="".join((elem.get("href"),"?ALL=1")), elem=elem, category=cat)
     else:
         for elem in grab.doc.select(self.xpath['product']):
             yield Task('product', url=elem.attr("href"), category=cat)