def parse(self, response): # print(response.text) # print(response.status) # print(json.loads(response.text)) # for collections in ["zhihu_paris","zhihu_car","zhihu_icu"]: mongo = MongoDBUtils("zhihu_paris") curInfo = mongo.distinctID("author.id") for uid in curInfo: if uid == "0": continue yield Request(self.start_url.format(uid), callback=self.parse_detail, dont_filter=True)
class Location: def __init__(self, collectionName): self.mongo = MongoDBUtils(collectionName) def analysis(self): curInfo = self.mongo.distinctID("author.id") location_list = [] local_mongo = MongoDBUtils("zhihu_user") for uid in curInfo: if uid != "0": curUser = local_mongo.searchByDoc({"_id": uid})[0] # 数据库有数据的 if curUser: if len(curUser.get("location")) == 0: continue else: # location_list.extend([data.get("name") for data in curUser.get("location")]) for data in curUser.get("location"): if data.get("name") in city_province.keys(): location_list.append( city_province.get(data.get("name"))) elif data.get("name") + "市" in city_province.keys( ): location_list.append( city_province.get(data.get("name") + "市")) # elif data.get("name")+"" in city_province.keys(): # location_list.append(city_province.get(data.get("name")+"")) else: # print(data.get("name")) for reg in province_list: if reg in data.get("name"): location_list.append(reg + "") location_list.append("其他") else: # 数据库没有数据的记录下来 # with open('uid2.txt','a+',encoding='utf-8') as f: # f.write(uid + '\n') # 加\n换行显示 pass location_dict = Counter(location_list) print(location_dict) location_data = [] for k, v in dict(location_dict).items(): location_data.append({"name": k, "value": v}) return location_data