class CitySpider(object): """ 爬取某城市中店铺评论 """ def __init__(self, cityName, area=None): self.cityName = cityName self.area = area self.city = City(cityName, searchDB=Database(MongoDB), commentsDB=Database(MongoDB)) self.city.get() self.category_list = [] self.coa_category = [] self.fin_category = [] self.process_category(self.city.category, self.category_list) self.coarsness_category(self.category_list) self.fine_grained_category(self.category_list) def get_area(self, save=False): """ 获取该城市所有的区 :param save: 是否将城市所有区保存到数据库 """ area_list = [] for item in self.city.locations: try: if (item['text'] == '全部地区'): continue area_list.append(item['text']) except: logger.debug(f'获取城市分区失败:[城市:{self.cityName}]') logger.info(f'获取 “{self.cityName}” 所有区成功.') if save: areaDB = init_area_db(Database(MongoDB)) areaDB.save({'area': area_list}, self.cityName) logger.info(f'已将 “{self.cityName}” 所有区信息保存到数据库中.') return area_list def process_category(self, obj, category_list): """ 处理分类数据 :return: category_list """ for item in obj: if 'children' in item.keys(): category_list.append({item['text']: []}) self.process_category( item['children'], category_list[len(category_list) - 1][item['text']]) else: category_list.append(item['text']) return category_list def coarsness_category(self, category_list): for item in category_list: if isinstance(item, str): self.coa_category.append(item) elif isinstance(item, dict): self.coa_category.append(list(item.keys())[0]) def fine_grained_category(self, category_list): for item in category_list: if isinstance(item, str): self.fin_category.append(item) elif isinstance(item, dict): self.fine_grained_category(item[list(item.keys())[0]]) def get_category(self, save=False): """ 获取该城市所有的店铺分类结果 :param save: 是否将城市所有店铺分类保存到数据库 """ logger.info(f'获取 “{self.cityName}” 所有店铺分类成功.') if save: categoryDB = init_category_db(Database(MongoDB)) categoryDB.save({'category': self.category_list}, tname=self.cityName) logger.info(f'已将 “{self.cityName}” 所有店铺分类信息保存到数据库中.') return self.city.category def save_shop_info(self): """ 获取该城市所有的店铺的信息,并保存在数据库中 """ for area in self.get_area(): for category in self.fin_category: if category == '全部分类': continue # self.city.search('', category=category, location=area, filter=None, sort='按人气排序', save=True, details=True, comments=False) self.city.async_search('', category=category, location=area, filter=None, sort='按人气排序', save=True, details=False, comments=False) return True def save_shop_comments(self): """ 获取该城市所有的店铺的评论信息,并保存在数据库中 """ self.city.get_comments(self.get_area(), self.fin_category) return True