def parseMain(self, response): self.restaurantIDsGetter = RestaurantIDsGetter(response) linksExtractor = LinkExtractor( allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*")) links = linksExtractor.extract_links(response) for link in links: yield Request(link.url, callback=self.parseRestaurants)
class RestaurantSpider(CrawlSpider): name = "RestaurantSpider" allowed_domains = ["domiciliosbogota.com"] start_urls = ( 'http://www.domiciliosbogota.com/', ) productLinkGetter = ProductLinkGetter() rules = [Rule(LinkExtractor(allow=(r"http://www\.domiciliosbogota\.com/$")), 'parseMain')] def parseMain(self, response): self.restaurantIDsGetter = RestaurantIDsGetter(response) linksExtractor = LinkExtractor(allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*")) links = linksExtractor.extract_links(response) for link in links: yield Request(link.url, callback = self.parseRestaurants) def parseRestaurants(self, response): sel = RestaurantSelector(response) restaurant = Restaurant() restaurant["url"] = response.url restaurant["name"] = sel.getName() restaurant["id"] = self.restaurantIDsGetter.getID("/" + response.url.split("/")[-1]) restaurant["deliveryTimeInMinutes"] = sel.getDeliveryTimeInMinutes() restaurant["minOrderPrice"] = sel.getMinOrderPrice() restaurant["deliveryCost"] = sel.getDeliveryCost() restaurant["payMethods"] = sel.getPayMethods() restaurant["menu"] = sel.getMenuCategories() restaurant["tagCategories"] = sel.getTagCategories() restaurant["averagePunctuation"] = sel.getAveragePunctuation() restaurant["quantityOfComments"] = sel.getQuantityOfComments() return restaurant
class RestaurantSpider(CrawlSpider): name = "RestaurantSpider" allowed_domains = ["domiciliosbogota.com"] start_urls = ('http://www.domiciliosbogota.com/', ) productLinkGetter = ProductLinkGetter() rules = [ Rule(LinkExtractor(allow=(r"http://www\.domiciliosbogota\.com/$")), 'parseMain') ] def parseMain(self, response): self.restaurantIDsGetter = RestaurantIDsGetter(response) linksExtractor = LinkExtractor( allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*")) links = linksExtractor.extract_links(response) for link in links: yield Request(link.url, callback=self.parseRestaurants) def parseRestaurants(self, response): sel = RestaurantSelector(response) restaurant = Restaurant() restaurant["url"] = response.url restaurant["name"] = sel.getName() restaurant["id"] = self.restaurantIDsGetter.getID( "/" + response.url.split("/")[-1]) restaurant["deliveryTimeInMinutes"] = sel.getDeliveryTimeInMinutes() restaurant["minOrderPrice"] = sel.getMinOrderPrice() restaurant["deliveryCost"] = sel.getDeliveryCost() restaurant["payMethods"] = sel.getPayMethods() restaurant["menu"] = sel.getMenuCategories() restaurant["tagCategories"] = sel.getTagCategories() restaurant["averagePunctuation"] = sel.getAveragePunctuation() restaurant["quantityOfComments"] = sel.getQuantityOfComments() return restaurant
class RestautantIDsGetterTest(unittest.TestCase): def setUp(self): self.response = fakeResponseFromFile("examples/main", None) self.restaurantIDsGetter = RestaurantIDsGetter(self.response) def tearDown(self): pass def testGetIDsFromMainPage(self): expectedId = self.restaurantIDsGetter.getID( "http://www.domiciliosbogota.com/domicilios-pan-pa-ya.html") self.assertEquals(expectedId, "6802") def testIfDoNotHaveIDReturnsNoID(self): expectedId = self.restaurantIDsGetter.getID( "http://www.domiciliosbogota.com/domicilios-cali-vea-castilla.html" ) self.assertEquals(expectedId, "NoID")
class RestautantIDsGetterTest(unittest.TestCase): def setUp(self): self.response = fakeResponseFromFile("examples/main", None) self.restaurantIDsGetter = RestaurantIDsGetter(self.response) def tearDown(self): pass def testGetIDsFromMainPage(self): expectedId =self.restaurantIDsGetter.getID("http://www.domiciliosbogota.com/domicilios-pan-pa-ya.html") self.assertEquals(expectedId, "6802") def testIfDoNotHaveIDReturnsNoID(self): expectedId =self.restaurantIDsGetter.getID("http://www.domiciliosbogota.com/domicilios-cali-vea-castilla.html") self.assertEquals(expectedId, "NoID")
def setUp(self): self.response = fakeResponseFromFile("examples/main", None) self.restaurantIDsGetter = RestaurantIDsGetter(self.response)
def parseMain(self, response): self.restaurantIDsGetter = RestaurantIDsGetter(response) linksExtractor = LinkExtractor(allow=(r"http\:\/\/www\.domiciliosbogota\.com\/domicilios\-.*")) links = linksExtractor.extract_links(response) for link in links: yield Request(link.url, callback = self.parseRestaurants)