def province_parser(self, province_information): provinces = json.loads(province_information.group(0)) for province in provinces: province.pop('id') province.pop('tags') province.pop('sort') province['comment'] = province['comment'].replace(' ', '') province['provinceEnglishName'] = city_name_map[province['provinceShortName']]['engName'] province['country'] = country_type_map.get(province['countryType']) return provinces
def province_parser(self, province_information): provinces = json.loads(province_information.group(0)) for province in provinces: province.pop('id') province.pop('tags') province.pop('sort') province['comment'] = province['comment'].replace(' ', '') if self.db.find_one(collection='DXYProvince', data=province): continue province['provinceEnglishName'] = city_name_map[province['provinceShortName']]['engName'] province['crawlTime'] = self.crawl_timestamp province['country'] = country_type_map.get(province['countryType']) self.db.insert(collection='DXYProvince', data=province)