def __init__(self, city_name, city_id, api, *args, **kwargs): self.api_key = str(api) self.city_id = city_id self.city_name = city_name self.base_url += city_id self.averages = {} self.top10_restaurants = {} self.db_manager = DBManager(self) Spider.__init__(self, *args, **kwargs) dispatcher.connect(self.spider_closed, signals.spider_closed)
class TripAdvisorCityRunnerSpider(Spider): name = 'tripadvisor_city_runner' base_url = 'http://api.tripadvisor.com/api/partner/2.0/location/' types = {0: 'attractions', 1: 'hotels', 2: 'restaurants'} def __init__(self, city_name, city_id, api, *args, **kwargs): self.api_key = str(api) self.city_id = city_id self.city_name = city_name self.base_url += city_id self.averages = {} self.top10_restaurants = {} self.db_manager = DBManager(self) Spider.__init__(self, *args, **kwargs) dispatcher.connect(self.spider_closed, signals.spider_closed) def start_requests(self): reqs = [Request(self.base_url + '/attractions?key=' + self.api_key, callback=self.parse), Request(self.base_url + '/hotels?key=' + self.api_key, callback=self.parse), Request(self.base_url + '/restaurants?key=' + self.api_key, callback=self.parse)] reqs[0].meta['type'] = self.types.get(0) reqs[1].meta['type'] = self.types.get(1) reqs[2].meta['type'] = self.types.get(2) return reqs def parse(self, response): data = json.loads(response.body_as_unicode()) avg = 0 for entry in data['data']: avg += float(entry['rating']) if response.meta['type'] == 'restaurants': self.top10_restaurants[entry['name']] = float(entry['rating']) if len(data['data']) > 0: avg /= len(data['data']) self.averages[response.meta['type']] = avg def spider_closed(self, spider): total_avg = (self.averages['attractions'] + self.averages['hotels'] + self.averages['restaurants'])/3 self.db_manager.insert_capital_city(self.city_name, self.city_id, self.averages['attractions'], self.averages['hotels'], self.averages['restaurants'], total_avg) for key, value in self.top10_restaurants.iteritems(): self.db_manager.insert_restaurant(self.city_id, key, value) self.db_manager.close_connection()